From 54f3afe3d79be9556cbc22fe83cd1a142cf401e4 Mon Sep 17 00:00:00 2001 From: Jean Christophe Roques Date: Wed, 29 May 2024 15:25:50 +0200 Subject: [PATCH] add agent client server add engine client server use of configured logger process uses po::split_unix to split command line add tests add agent_check_result_builder --- agent/CMakeLists.txt | 5 + agent/doc/agent-doc.md | 5 +- agent/inc/com/centreon/agent/bireactor.hh | 84 +++ agent/inc/com/centreon/agent/check.hh | 19 +- agent/inc/com/centreon/agent/check_exec.hh | 51 +- agent/inc/com/centreon/agent/scheduler.hh | 43 +- .../com/centreon/agent/streaming_client.hh | 109 ++++ .../com/centreon/agent/streaming_server.hh | 73 +++ agent/proto/agent.proto | 48 +- agent/src/bireactor.cc | 207 ++++++++ agent/src/check.cc | 44 +- agent/src/check_exec.cc | 72 ++- agent/src/main.cc | 86 +++- agent/src/scheduler.cc | 129 +++-- agent/src/streaming_client.cc | 229 +++++++++ agent/src/streaming_server.cc | 237 +++++++++ agent/test/CMakeLists.txt | 4 +- agent/test/check_exec_test.cc | 15 +- agent/test/check_test.cc | 9 +- agent/test/scheduler_test.cc | 165 +++--- broker/CMakeLists.txt | 2 + .../com/centreon/common/grpc/grpc_config.hh | 31 ++ common/inc/com/centreon/common/process.hh | 24 +- common/src/perfdata.cc | 4 - common/src/process.cc | 4 +- common/tests/CMakeLists.txt | 2 + engine/modules/opentelemetry/CMakeLists.txt | 5 + .../agent_check_result_builder.hh | 117 +++++ .../centreon_agent/agent_config.hh | 77 +++ .../centreon_agent/agent_impl.hh | 80 +++ .../centreon_agent/agent_reverse_client.hh | 63 +++ .../centreon_agent/agent_service.hh | 71 +++ .../centreon_agent/to_agent_connector.hh | 79 +++ .../modules/opentelemetry/conf_helper.hh | 100 ++++ .../modules/opentelemetry/grpc_config.hh | 8 + .../modules/opentelemetry/open_telemetry.hh | 6 +- .../opentelemetry/otl_check_result_builder.hh | 5 +- .../modules/opentelemetry/otl_config.hh | 9 + .../modules/opentelemetry/otl_data_point.hh | 15 + .../engine/modules/opentelemetry/otl_fmt.hh | 66 +++ .../modules/opentelemetry/otl_server.hh | 26 +- .../opentelemetry/telegraf/conf_server.hh | 2 +- .../telegraf/nagios_check_result_builder.hh | 1 + .../opentelemetry/precomp_inc/precomp.hh | 1 + .../agent_check_result_builder.cc | 168 ++++++ .../src/centreon_agent/agent_config.cc | 125 +++++ .../src/centreon_agent/agent_impl.cc | 357 +++++++++++++ .../centreon_agent/agent_reverse_client.cc | 130 +++++ .../src/centreon_agent/agent_service.cc | 152 ++++++ .../src/centreon_agent/to_agent_connector.cc | 223 ++++++++ .../opentelemetry/src/open_telemetry.cc | 40 +- .../src/otl_check_result_builder.cc | 11 + .../modules/opentelemetry/src/otl_config.cc | 55 +- .../opentelemetry/src/otl_data_point.cc | 13 + .../modules/opentelemetry/src/otl_server.cc | 32 +- .../opentelemetry/src/telegraf/conf_server.cc | 24 +- engine/precomp_inc/precomp.hh | 1 + engine/src/service.cc | 34 +- engine/tests/CMakeLists.txt | 8 +- .../agent_check_result_builder_test.cc | 482 ++++++++++++++++++ .../agent_reverse_client_test.cc | 154 ++++++ .../opentelemetry/agent_to_engine_test.cc | 316 ++++++++++++ .../opentelemetry/open_telemetry_test.cc | 4 +- engine/tests/opentelemetry/otl_server_test.cc | 14 +- engine/tests/test_engine.cc | 18 +- engine/tests/test_engine.hh | 11 +- 66 files changed, 4391 insertions(+), 413 deletions(-) create mode 100644 agent/inc/com/centreon/agent/bireactor.hh create mode 100644 agent/inc/com/centreon/agent/streaming_client.hh create mode 100644 agent/inc/com/centreon/agent/streaming_server.hh create mode 100644 agent/src/bireactor.cc create mode 100644 agent/src/streaming_client.cc create mode 100644 agent/src/streaming_server.cc create mode 100644 engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_check_result_builder.hh create mode 100644 engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh create mode 100644 engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_reverse_client.hh create mode 100644 engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_service.hh create mode 100644 engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/to_agent_connector.hh create mode 100644 engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/conf_helper.hh create mode 100644 engine/modules/opentelemetry/src/centreon_agent/agent_check_result_builder.cc create mode 100644 engine/modules/opentelemetry/src/centreon_agent/agent_config.cc create mode 100644 engine/modules/opentelemetry/src/centreon_agent/agent_reverse_client.cc create mode 100644 engine/modules/opentelemetry/src/centreon_agent/agent_service.cc create mode 100644 engine/modules/opentelemetry/src/centreon_agent/to_agent_connector.cc create mode 100644 engine/tests/opentelemetry/agent_check_result_builder_test.cc create mode 100644 engine/tests/opentelemetry/agent_reverse_client_test.cc create mode 100644 engine/tests/opentelemetry/agent_to_engine_test.cc diff --git a/agent/CMakeLists.txt b/agent/CMakeLists.txt index d16ff95b37d..012c223aa98 100644 --- a/agent/CMakeLists.txt +++ b/agent/CMakeLists.txt @@ -100,6 +100,7 @@ add_custom_command( add_library(centreon_agent_lib STATIC ${SRC_DIR}/agent.grpc.pb.cc ${SRC_DIR}/agent.pb.cc + ${SRC_DIR}/bireactor.cc ${SRC_DIR}/check.cc ${SRC_DIR}/check_exec.cc ${SRC_DIR}/opentelemetry/proto/collector/metrics/v1/metrics_service.grpc.pb.cc @@ -108,12 +109,15 @@ add_library(centreon_agent_lib STATIC ${SRC_DIR}/opentelemetry/proto/common/v1/common.pb.cc ${SRC_DIR}/opentelemetry/proto/resource/v1/resource.pb.cc ${SRC_DIR}/scheduler.cc + ${SRC_DIR}/streaming_client.cc + ${SRC_DIR}/streaming_server.cc ) include_directories( ${INCLUDE_DIR} ${SRC_DIR} ${CMAKE_SOURCE_DIR}/common/inc + ${CMAKE_SOURCE_DIR}/common/grpc/inc ) target_precompile_headers(centreon_agent_lib PRIVATE precomp_inc/precomp.hh) @@ -128,6 +132,7 @@ target_link_libraries( # berpc centreon_agent_lib centreon_common + centreon_grpc -L${Boost_LIBRARY_DIR_RELEASE} boost_program_options fmt::fmt) diff --git a/agent/doc/agent-doc.md b/agent/doc/agent-doc.md index 49d710639f0..f8c167ab93b 100644 --- a/agent/doc/agent-doc.md +++ b/agent/doc/agent-doc.md @@ -3,11 +3,12 @@ ## Introduction The goal of this program is to execute checks in both windows and linux OS -It's full asynchronous, excepted grpc layers, it's single threaded and you won't find mutex in code. +It's full asynchronous, excepted grpc layers, it's single threaded and you won't find mutex in non grpc code. +This is why when we receive request, we post it to asio in order to process it in the main thread. ## Configuration configuration is given by Engine by a AgentConfiguration sent over grpc -The configuration object is embedded in EngineToAgent::config +The configuration object is embedded in MessageToAgent::config ## Scheduler We trie to spread checks over check_period. diff --git a/agent/inc/com/centreon/agent/bireactor.hh b/agent/inc/com/centreon/agent/bireactor.hh new file mode 100644 index 00000000000..77c0123b411 --- /dev/null +++ b/agent/inc/com/centreon/agent/bireactor.hh @@ -0,0 +1,84 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#ifndef CENTREON_AGENT_BIREACTOR_HH +#define CENTREON_AGENT_BIREACTOR_HH + +#include "agent.grpc.pb.h" + +namespace com::centreon::agent { + +template +class bireactor + : public bireactor_class, + public std::enable_shared_from_this> { + private: + static std::set> _instances; + static std::mutex _instances_m; + + bool _write_pending; + std::deque> _write_queue; + std::shared_ptr _read_current; + + const std::string_view _class_name; + + const std::string _peer; + + protected: + std::shared_ptr _io_context; + std::shared_ptr _logger; + + bool _alive; + mutable std::mutex _protect; + + public: + bireactor(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::string_view& class_name, + const std::string& peer); + + virtual ~bireactor(); + + static void register_stream(const std::shared_ptr& strm); + + void start_read(); + + void start_write(); + void write(const std::shared_ptr& request); + + // bireactor part + void OnReadDone(bool ok) override; + + virtual void on_incomming_request( + const std::shared_ptr& request) = 0; + + virtual void on_error() = 0; + + void OnWriteDone(bool ok) override; + + // server version + void OnDone(); + // client version + void OnDone(const ::grpc::Status& /*s*/); + + virtual void shutdown(); +}; + +} // namespace com::centreon::agent + +#endif diff --git a/agent/inc/com/centreon/agent/check.hh b/agent/inc/com/centreon/agent/check.hh index e3e6f34bb11..06cf49910d2 100644 --- a/agent/inc/com/centreon/agent/check.hh +++ b/agent/inc/com/centreon/agent/check.hh @@ -25,7 +25,7 @@ namespace com::centreon::agent { using engine_to_agent_request_ptr = - std::shared_ptr; + std::shared_ptr; using time_point = std::chrono::system_clock::time_point; using duration = std::chrono::system_clock::duration; @@ -46,7 +46,6 @@ class check : public std::enable_shared_from_this { private: time_point _start_expected; - const std::string& _host; const std::string& _service; const std::string& _command_name; const std::string& _command_line; @@ -77,26 +76,14 @@ class check : public std::enable_shared_from_this { public: using pointer = std::shared_ptr; - template check(const std::shared_ptr& io_context, const std::shared_ptr& logger, time_point exp, - const std::string& hst, const std::string& serv, const std::string& command_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - handler_type&& handler) - : _start_expected(exp), - _host(hst), - _service(serv), - _command_name(command_name), - _command_line(cmd_line), - _conf(cnf), - _io_context(io_context), - _logger(logger), - _time_out_timer(*io_context), - _completion_handler(handler) {} + completion_handler&& handler); virtual ~check() = default; @@ -111,8 +98,6 @@ class check : public std::enable_shared_from_this { time_point get_start_expected() const { return _start_expected; } - const std::string& get_host() const { return _host; } - const std::string& get_service() const { return _service; } const std::string& get_command_name() const { return _command_name; } diff --git a/agent/inc/com/centreon/agent/check_exec.hh b/agent/inc/com/centreon/agent/check_exec.hh index 151682029fe..42107040c4a 100644 --- a/agent/inc/com/centreon/agent/check_exec.hh +++ b/agent/inc/com/centreon/agent/check_exec.hh @@ -88,75 +88,30 @@ class check_exec : public check { void _init(); public: - template check_exec(const std::shared_ptr& io_context, const std::shared_ptr& logger, time_point exp, - const std::string& hst, const std::string& serv, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - handler_type&& handler) - : check(io_context, - logger, - exp, - hst, - serv, - cmd_name, - cmd_line, - cnf, - handler) {} - - template + check::completion_handler&& handler); + static std::shared_ptr load( const std::shared_ptr& io_context, const std::shared_ptr& logger, time_point exp, - const std::string& hst, const std::string& serv, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - handler_type&& handler); + check::completion_handler&& handler); void start_check(const duration& timeout) override; void on_completion(unsigned running_index); }; -/** - * @brief create and initialize a check_exec object (don't use constructor) - * - * @tparam handler_type - * @param io_context - * @param logger - * @param exp start expected - * @param hst - * @param serv - * @param cmd_name - * @param cmd_line - * @param cnf agent configuration - * @param handler completion handler - * @return std::shared_ptr - */ -template -std::shared_ptr check_exec::load( - const std::shared_ptr& io_context, - const std::shared_ptr& logger, - time_point exp, - const std::string& hst, - const std::string& serv, - const std::string& cmd_name, - const std::string& cmd_line, - const engine_to_agent_request_ptr& cnf, - handler_type&& handler) { - std::shared_ptr ret = std::make_shared( - io_context, logger, exp, hst, serv, cmd_name, cmd_line, cnf, handler); - ret->_init(); - return ret; -} - } // namespace com::centreon::agent #endif diff --git a/agent/inc/com/centreon/agent/scheduler.hh b/agent/inc/com/centreon/agent/scheduler.hh index dc9b030b575..bee50593cb6 100644 --- a/agent/inc/com/centreon/agent/scheduler.hh +++ b/agent/inc/com/centreon/agent/scheduler.hh @@ -23,10 +23,6 @@ namespace com::centreon::agent { -using export_metric_request = - ::opentelemetry::proto::collector::metrics::v1::ExportMetricsServiceRequest; -using export_metric_request_ptr = std::shared_ptr; - /** * @brief the core of the agent * It has to create check object with chck_builder passed in parameter of load @@ -35,12 +31,12 @@ using export_metric_request_ptr = std::shared_ptr; */ class scheduler : public std::enable_shared_from_this { public: - using metric_sender = std::function; + using metric_sender = + std::function&)>; using check_builder = std::function( const std::shared_ptr&, const std::shared_ptr& /*logger*/, time_point /* start expected*/, - const std::string& /*host*/, const std::string& /*service*/, const std::string& /*cmd_name*/, const std::string& /*cmd_line*/, @@ -55,7 +51,7 @@ class scheduler : public std::enable_shared_from_this { bool _alive = true; // request that will be sent to engine - export_metric_request_ptr _current_request; + std::shared_ptr _current_request; struct scope_metric_request { ::opentelemetry::proto::metrics::v1::ScopeMetrics* scope_metric; @@ -64,11 +60,11 @@ class scheduler : public std::enable_shared_from_this { metrics; }; - absl::flat_hash_map, scope_metric_request> - _host_serv_to_scope_metrics; + absl::flat_hash_map _serv_to_scope_metrics; std::shared_ptr _io_context; std::shared_ptr _logger; + std::string _supervised_host; metric_sender _metric_sender; asio::system_timer _send_timer; asio::system_timer _check_timer; @@ -95,14 +91,13 @@ class scheduler : public std::enable_shared_from_this { unsigned status, const std::list& perfdata, const std::list& outputs); - void _store_result_in_metrics_and_examplars( + void _store_result_in_metrics_and_exemplars( const check::pointer& check, unsigned status, const std::list& perfdata, const std::list& outputs); - scope_metric_request& _get_scope_metrics(const std::string& host, - const std::string& service); + scope_metric_request& _get_scope_metrics(const std::string& service); ::opentelemetry::proto::metrics::v1::Metric* _get_metric( scope_metric_request& scope_metric, @@ -127,23 +122,32 @@ class scheduler : public std::enable_shared_from_this { template scheduler(const std::shared_ptr& io_context, const std::shared_ptr& logger, - const std::shared_ptr& config, + const std::string& supervised_host, + const std::shared_ptr& config, sender&& met_sender, chck_builder&& builder); + scheduler(const scheduler&) = delete; + scheduler operator=(const scheduler&) = delete; + void update(const engine_to_agent_request_ptr& conf); - static std::shared_ptr default_config(); + static std::shared_ptr default_config(); template static std::shared_ptr load( const std::shared_ptr& io_context, const std::shared_ptr& logger, - const std::shared_ptr& config, + const std::string& supervised_host, + const std::shared_ptr& config, sender&& met_sender, chck_builder&& chk_builder); void stop(); + + engine_to_agent_request_ptr get_last_message_to_agent() const { + return _conf; + } }; /** @@ -158,12 +162,14 @@ template scheduler::scheduler( const std::shared_ptr& io_context, const std::shared_ptr& logger, - const std::shared_ptr& config, + const std::string& supervised_host, + const std::shared_ptr& config, sender&& met_sender, chck_builder&& builder) : _metric_sender(met_sender), _io_context(io_context), _logger(logger), + _supervised_host(supervised_host), _send_timer(*io_context), _check_timer(*io_context), _check_builder(builder), @@ -181,11 +187,12 @@ template std::shared_ptr scheduler::load( const std::shared_ptr& io_context, const std::shared_ptr& logger, - const std::shared_ptr& config, + const std::string& supervised_host, + const std::shared_ptr& config, sender&& met_sender, chck_builder&& chk_builder) { std::shared_ptr to_start = std::make_shared( - io_context, logger, config, std::move(met_sender), + io_context, logger, supervised_host, config, std::move(met_sender), std::move(chk_builder)); to_start->_start(); return to_start; diff --git a/agent/inc/com/centreon/agent/streaming_client.hh b/agent/inc/com/centreon/agent/streaming_client.hh new file mode 100644 index 00000000000..28b5ff636fc --- /dev/null +++ b/agent/inc/com/centreon/agent/streaming_client.hh @@ -0,0 +1,109 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#ifndef CENTREON_AGENT_STREAMING_CLIENT_HH +#define CENTREON_AGENT_STREAMING_CLIENT_HH + +#include "com/centreon/common/grpc/grpc_client.hh" + +#include "bireactor.hh" +#include "scheduler.hh" + +namespace com::centreon::agent { + +class streaming_client; + +class client_reactor + : public bireactor< + ::grpc::ClientBidiReactor> { + std::weak_ptr _parent; + ::grpc::ClientContext _context; + + public: + client_reactor(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::shared_ptr& parent, + const std::string& peer); + + std::shared_ptr shared_from_this() { + return std::static_pointer_cast( + bireactor<::grpc::ClientBidiReactor>:: + shared_from_this()); + } + + ::grpc::ClientContext& get_context() { return _context; } + + void on_incomming_request( + const std::shared_ptr& request) override; + + void on_error() override; + + void shutdown() override; +}; + +/** + * @brief this object not only manages connection to engine, but also embed + * check scheduler + * + */ +class streaming_client : public common::grpc::grpc_client_base, + public std::enable_shared_from_this { + std::shared_ptr _io_context; + std::shared_ptr _logger; + std::string _supervised_host; + + std::unique_ptr _stub; + + std::shared_ptr _reactor; + std::shared_ptr _sched; + + std::mutex _protect; + + void _create_reactor(); + + void _start(); + + void _send(const std::shared_ptr& request); + + public: + streaming_client(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::shared_ptr& conf, + const std::string& supervised_host); + + static std::shared_ptr load( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::shared_ptr& conf, + const std::string& supervised_host); + + void on_incomming_request(const std::shared_ptr& caller, + const std::shared_ptr& request); + void on_error(const std::shared_ptr& caller); + + void shutdown(); + + // use only for tests + engine_to_agent_request_ptr get_last_message_to_agent() const { + return _sched->get_last_message_to_agent(); + } +}; + +} // namespace com::centreon::agent + +#endif \ No newline at end of file diff --git a/agent/inc/com/centreon/agent/streaming_server.hh b/agent/inc/com/centreon/agent/streaming_server.hh new file mode 100644 index 00000000000..eba43886a3d --- /dev/null +++ b/agent/inc/com/centreon/agent/streaming_server.hh @@ -0,0 +1,73 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#ifndef CENTREON_AGENT_STREAMING_SERVER_HH +#define CENTREON_AGENT_STREAMING_SERVER_HH + +#include "com/centreon/common/grpc/grpc_server.hh" + +#include "bireactor.hh" +#include "scheduler.hh" + +namespace com::centreon::agent { + +class server_reactor; + +/** + * @brief grpc engine to agent server (reverse connection) + * It accept only one connection at a time + * If another connection occurs, previous connection is shutdown + * This object is both grpc server and grpc service + */ +class streaming_server : public common::grpc::grpc_server_base, + public std::enable_shared_from_this, + public ReversedAgentService::Service { + std::shared_ptr _io_context; + std::shared_ptr _logger; + std::string _supervised_host; + + /** active engine to agent connection*/ + std::shared_ptr _incoming; + + mutable std::mutex _protect; + + void _start(); + + public: + streaming_server(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::shared_ptr& conf, + const std::string& supervised_host); + + ~streaming_server(); + + static std::shared_ptr load( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::shared_ptr& conf, + const std::string& supervised_host); + + ::grpc::ServerBidiReactor* Import( + ::grpc::CallbackServerContext* context); + + void shutdown(); +}; + +} // namespace com::centreon::agent + +#endif diff --git a/agent/proto/agent.proto b/agent/proto/agent.proto index 0f856feb247..5a9190d2c12 100644 --- a/agent/proto/agent.proto +++ b/agent/proto/agent.proto @@ -24,19 +24,19 @@ import "opentelemetry/proto/collector/metrics/v1/metrics_service.proto"; package com.centreon.agent; // Agent connects to engine -service AgentToEngineService { - rpc Export(AgentToEngine) returns (stream EngineToAgent) {} +service AgentService { + rpc Export(stream MessageFromAgent) returns (stream MessageToAgent) {} } // Engine connects to agent (reversed connection) -service EngineToAgentService { - rpc Export(EngineToAgent) returns (stream AgentToEngine) {} +service ReversedAgentService { + rpc Import(stream MessageToAgent) returns (stream MessageFromAgent) {} } //Message sent to agent reversed connection or not -message EngineToAgent { +message MessageToAgent { oneof content { AgentConfiguration config = 1; opentelemetry.proto.collector.metrics.v1.ExportMetricsServiceResponse otel_response = 2; @@ -44,7 +44,7 @@ message EngineToAgent { } //Message sent to Engine reversed connection or not -message AgentToEngine { +message MessageFromAgent { oneof content { AgentInfo init = 1; opentelemetry.proto.collector.metrics.v1.ExportMetricsServiceRequest otel_request = 2; @@ -52,7 +52,7 @@ message AgentToEngine { } //Binary version Engine or Agent -message CentreonVersion { +message Version { uint32 major = 1; uint32 minor = 2; uint32 patch = 3; @@ -61,33 +61,25 @@ message CentreonVersion { //First message sent to engine message AgentInfo { //host name of the computer of the agent - string host_name=1; - CentreonVersion agent_centreon_version=2; - //hosts supervised by agent - repeated string hosts=3; + string host=1; + Version centreon_version=2; } //Agent configuration sent by Engine message AgentConfiguration { - CentreonVersion engine_centreon_version = 1; - //delay between 2 checks of one service, so we will do all check in that period - uint32 second_check_interval = 2; + Version centreon_version = 1; + //delay between 2 checks of one service, so we will do all check in that period (in seconds) + uint32 check_interval = 2; //limit the number of active checks in order to limit charge uint32 max_concurrent_checks = 3; - //period of metric exports - uint32 second_export_period = 4; - //after this timeout, process is killed - uint32 second_check_timeout = 5; - //if true we store nagios other metrics (min max warn crit in Examplar otel objects) - bool use_examplar = 6; - //host list - repeated Host hosts = 7; -} - -//Host (poller configuration definition) -message Host { - string host = 1; - repeated Service services = 2; + //period of metric exports (in seconds) + uint32 export_period = 4; + //after this timeout, process is killed (in seconds) + uint32 check_timeout = 5; + //if true we store nagios other metrics (min max warn crit in Exemplar otel objects) + bool use_exemplar = 6; + //list of services with their commands + repeated Service services = 7; } //Service (poller configuration definition) diff --git a/agent/src/bireactor.cc b/agent/src/bireactor.cc new file mode 100644 index 00000000000..bf08196f0ed --- /dev/null +++ b/agent/src/bireactor.cc @@ -0,0 +1,207 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include "bireactor.hh" + +using namespace com::centreon::agent; + +/** + * @brief when BiReactor::OnDone is called by grpc layers, we should delete + * this. But this object is even used by others. + * So it's stored in this container and just removed from this container when + * OnDone is called + * + * @tparam bireactor_class + */ +template +std::set>> + bireactor::_instances; + +template +std::mutex bireactor::_instances_m; + +template +bireactor::bireactor( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::string_view& class_name, + const std::string& peer) + : _write_pending(false), + _alive(true), + _class_name(class_name), + _peer(peer), + _io_context(io_context), + _logger(logger) { + SPDLOG_LOGGER_DEBUG(_logger, "create {} this={:p} peer:{}", _class_name, + static_cast(this), _peer); +} + +template +bireactor::~bireactor() { + SPDLOG_LOGGER_DEBUG(_logger, "delete {} this={:p} peer:{}", _class_name, + static_cast(this), _peer); +} + +template +void bireactor::register_stream( + const std::shared_ptr& strm) { + std::lock_guard l(_instances_m); + _instances.insert(strm); +} + +template +void bireactor::start_read() { + std::lock_guard l(_protect); + if (!_alive) { + return; + } + std::shared_ptr to_read; + if (_read_current) { + return; + } + to_read = _read_current = std::make_shared(); + bireactor_class::StartRead(to_read.get()); +} + +template +void bireactor::OnReadDone(bool ok) { + if (ok) { + std::shared_ptr readden; + { + std::lock_guard l(_protect); + SPDLOG_LOGGER_TRACE(_logger, "{:p} {} peer {} receive: {}", + static_cast(this), _class_name, _peer, + _read_current->DebugString()); + readden = _read_current; + _read_current.reset(); + } + start_read(); + if (readden->has_config()) { + on_incomming_request(readden); + } + } else { + SPDLOG_LOGGER_ERROR(_logger, "{:p} {} peer:{} fail read from stream", + static_cast(this), _class_name, _peer); + on_error(); + shutdown(); + } +} + +template +void bireactor::write( + const std::shared_ptr& request) { + { + std::lock_guard l(_protect); + if (!_alive) { + return; + } + _write_queue.push_back(request); + } + start_write(); +} + +template +void bireactor::start_write() { + std::shared_ptr to_send; + { + std::lock_guard l(_protect); + if (!_alive || _write_pending || _write_queue.empty()) { + return; + } + to_send = _write_queue.front(); + _write_pending = true; + } + bireactor_class::StartWrite(to_send.get()); +} + +template +void bireactor::OnWriteDone(bool ok) { + if (ok) { + { + std::lock_guard l(_protect); + _write_pending = false; + SPDLOG_LOGGER_TRACE(_logger, "{:p} {} {} sent", + static_cast(this), _class_name, + (*_write_queue.begin())->DebugString()); + _write_queue.pop_front(); + } + start_write(); + } else { + SPDLOG_LOGGER_ERROR(_logger, "{:p} {} peer {} fail write to stream", + static_cast(this), _class_name, _peer); + on_error(); + shutdown(); + } +} + +template +void bireactor::OnDone() { + /**grpc has a bug, sometimes if we delete this class in this handler as it is + * described in examples, it also deletes used channel and does a pthread_join + * of the current thread witch go to a EDEADLOCK error and call grpc::Crash. + * So we uses asio thread to do the job + */ + _io_context->post([me = std::enable_shared_from_this< + bireactor>::shared_from_this(), + &peer = _peer, logger = _logger]() { + std::lock_guard l(_instances_m); + SPDLOG_LOGGER_DEBUG(logger, "{:p} server::OnDone() to {}", + static_cast(me.get()), peer); + _instances.erase(std::static_pointer_cast>(me)); + }); +} + +template +void bireactor::OnDone(const ::grpc::Status& status) { + /**grpc has a bug, sometimes if we delete this class in this handler as it is + * described in examples, it also deletes used channel and does a + * pthread_join of the current thread witch go to a EDEADLOCK error and call + * grpc::Crash. So we uses asio thread to do the job + */ + _io_context->post([me = std::enable_shared_from_this< + bireactor>::shared_from_this(), + status, &peer = _peer, logger = _logger]() { + std::lock_guard l(_instances_m); + if (status.ok()) { + SPDLOG_LOGGER_DEBUG(logger, "{:p} peer: {} client::OnDone({}) {}", + static_cast(me.get()), peer, + status.error_message(), status.error_details()); + } else { + SPDLOG_LOGGER_ERROR(logger, "{:p} peer:{} client::OnDone({}) {}", + static_cast(me.get()), peer, + status.error_message(), status.error_details()); + } + _instances.erase(std::static_pointer_cast>(me)); + }); +} + +template +void bireactor::shutdown() { + SPDLOG_LOGGER_DEBUG(_logger, "{:p} {}::shutdown", static_cast(this), + _class_name); +} + +namespace com::centreon::agent { + +template class bireactor< + ::grpc::ClientBidiReactor>; + +template class bireactor< + ::grpc::ServerBidiReactor>; + +} // namespace com::centreon::agent \ No newline at end of file diff --git a/agent/src/check.cc b/agent/src/check.cc index 0f1917fce66..751ba38d6b1 100644 --- a/agent/src/check.cc +++ b/agent/src/check.cc @@ -20,6 +20,36 @@ using namespace com::centreon::agent; +/** + * @brief Construct a new check::check object + * + * @param io_context + * @param logger + * @param exp + * @param serv + * @param command_name + * @param cmd_line + * @param cnf + * @param handler + */ +check::check(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point exp, + const std::string& serv, + const std::string& command_name, + const std::string& cmd_line, + const engine_to_agent_request_ptr& cnf, + completion_handler&& handler) + : _start_expected(exp), + _service(serv), + _command_name(command_name), + _command_line(cmd_line), + _conf(cnf), + _io_context(io_context), + _logger(logger), + _time_out_timer(*io_context), + _completion_handler(handler) {} + /** * @brief scheduler uses this method to increase start_expected * @@ -36,9 +66,8 @@ void check::add_duration_to_start_expected(const duration& to_add) { */ void check::start_check(const duration& timeout) { if (_running_check) { - SPDLOG_LOGGER_ERROR(_logger, - "check for host {}, service {} is already running", - _host, _service); + SPDLOG_LOGGER_ERROR(_logger, "check for service {} is already running", + _service); _io_context->post( [me = shared_from_this(), to_call = _completion_handler]() { to_call(me, 3, std::list(), @@ -48,8 +77,7 @@ void check::start_check(const duration& timeout) { } _running_check = true; _start_timeout_timer(timeout); - SPDLOG_LOGGER_TRACE(_logger, "start check for host {}, service {}", _host, - _service); + SPDLOG_LOGGER_TRACE(_logger, "start check for service {}", _service); } /** @@ -78,8 +106,7 @@ void check::_timeout_timer_handler(const boost::system::error_code& err, return; } if (start_check_index == _running_check_index) { - SPDLOG_LOGGER_ERROR(_logger, "check timeout for host {}, service {}", _host, - _service); + SPDLOG_LOGGER_ERROR(_logger, "check timeout for service {}", _service); on_completion(start_check_index, 3 /*unknown*/, std::list(), {"Timeout at execution of " + _command_line}); @@ -102,8 +129,7 @@ void check::on_completion( const std::list& perfdata, const std::list& outputs) { if (start_check_index == _running_check_index) { - SPDLOG_LOGGER_TRACE(_logger, "end check for host {}, service {}", _host, - _service); + SPDLOG_LOGGER_TRACE(_logger, "end check for service {}", _service); _time_out_timer.cancel(); _running_check = false; ++_running_check_index; diff --git a/agent/src/check_exec.cc b/agent/src/check_exec.cc index 5f1e1393f3c..98e40ae8532 100644 --- a/agent/src/check_exec.cc +++ b/agent/src/check_exec.cc @@ -73,8 +73,8 @@ void detail::process::on_stdout_read(const boost::system::error_code& err, void detail::process::on_stderr_read(const boost::system::error_code& err, size_t nb_read) { if (!err) { - SPDLOG_ERROR("process error: {}", - std::string_view(_stderr_read_buffer, nb_read)); + SPDLOG_LOGGER_ERROR(_logger, "process error: {}", + std::string_view(_stderr_read_buffer, nb_read)); } common::process::on_stderr_read(err, nb_read); } @@ -114,6 +114,53 @@ void detail::process::_on_completion() { * check_exec ******************************************************************/ +check_exec::check_exec(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point exp, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler) + : check(io_context, + logger, + exp, + serv, + cmd_name, + cmd_line, + cnf, + std::move(handler)) {} + +/** + * @brief create and initialize a check_exec object (don't use constructor) + * + * @tparam handler_type + * @param io_context + * @param logger + * @param exp start expected + * @param serv + * @param cmd_name + * @param cmd_line + * @param cnf agent configuration + * @param handler completion handler + * @return std::shared_ptr + */ +std::shared_ptr check_exec::load( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point exp, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler) { + std::shared_ptr ret = + std::make_shared(io_context, logger, exp, serv, cmd_name, + cmd_line, cnf, std::move(handler)); + ret->_init(); + return ret; +} + /** * @brief to call after construction * constructor mustn't be called, use check_exec::load instead @@ -122,11 +169,11 @@ void detail::process::_on_completion() { void check_exec::_init() { try { _process = std::make_shared( - _io_context, spdlog::default_logger(), get_command_line(), + _io_context, _logger, get_command_line(), std::static_pointer_cast(shared_from_this())); } catch (const std::exception& e) { - SPDLOG_ERROR("fail to create process of cmd_line '{}' : {}", - get_command_line(), e.what()); + SPDLOG_LOGGER_ERROR(_logger, "fail to create process of cmd_line '{}' : {}", + get_command_line(), e.what()); } } @@ -150,9 +197,8 @@ void check_exec::start_check(const duration& timeout) { try { _process->start(_get_running_check_index()); } catch (const boost::system::system_error& e) { - SPDLOG_LOGGER_ERROR(_logger, "host {}, serv {} fail to execute {}: {}", - get_host(), get_service(), get_command_line(), - e.code().message()); + SPDLOG_LOGGER_ERROR(_logger, " serv {} fail to execute {}: {}", + get_service(), get_command_line(), e.code().message()); _io_context->post([me = check::shared_from_this(), start_check_index = _get_running_check_index(), e]() { me->on_completion( @@ -161,9 +207,8 @@ void check_exec::start_check(const duration& timeout) { e.code().message())}); }); } catch (const std::exception& e) { - SPDLOG_LOGGER_ERROR(_logger, "host {}, serv {} fail to execute {}: {}", - get_host(), get_service(), get_command_line(), - e.what()); + SPDLOG_LOGGER_ERROR(_logger, " serv {} fail to execute {}: {}", + get_service(), get_command_line(), e.what()); _io_context->post([me = check::shared_from_this(), start_check_index = _get_running_check_index(), e]() { me->on_completion(start_check_index, 3, @@ -210,9 +255,10 @@ void check_exec::on_completion(unsigned running_index) { const std::string& first_line = *outputs.begin(); size_t pipe_pos = first_line.find('|'); if (pipe_pos != std::string::npos) { + std::string perfdatas = outputs.begin()->substr(pipe_pos + 1); + boost::trim(perfdatas); perfs = com::centreon::common::perfdata::parse_perfdata( - 0, 0, outputs.begin()->substr(pipe_pos + 1).c_str(), - spdlog::default_logger()); + 0, 0, perfdatas.c_str(), _logger); } } check::on_completion(running_index, _process->get_exit_status(), perfs, diff --git a/agent/src/main.cc b/agent/src/main.cc index 460cae38364..284ef6ec129 100644 --- a/agent/src/main.cc +++ b/agent/src/main.cc @@ -20,7 +20,8 @@ #include #include -#include "scheduler.hh" +#include "streaming_client.hh" +#include "streaming_server.hh" namespace po = boost::program_options; @@ -30,6 +31,9 @@ std::shared_ptr g_io_context = std::make_shared(); std::shared_ptr g_logger; +static std::shared_ptr _streaming_client; + +static std::shared_ptr _streaming_server; static void signal_handler(const boost::system::error_code& error, int signal_number) { @@ -40,12 +44,14 @@ static void signal_handler(const boost::system::error_code& error, g_io_context->stop(); break; case SIGUSR1: + SPDLOG_LOGGER_INFO(g_logger, "SIGUSR1 received"); if (g_logger->level()) { g_logger->set_level( static_cast(g_logger->level() - 1)); } break; case SIGUSR2: + SPDLOG_LOGGER_INFO(g_logger, "SIGUSR2 received"); if (g_logger->level() < spdlog::level::off) { g_logger->set_level( static_cast(g_logger->level() + 1)); @@ -55,6 +61,27 @@ static void signal_handler(const boost::system::error_code& error, } } +static std::string read_crypto_file(const char* field, + const po::variables_map& vm) { + if (!vm.count(field)) { + return {}; + } + std::string path = vm[field].as(); + try { + std::ifstream file(path); + if (file.is_open()) { + std::stringstream ss; + ss << file.rdbuf(); + file.close(); + return ss.str(); + } + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(g_logger, "{} fail to read {}: {}", field, path, + e.what()); + } + return ""; +} + int main(int argc, char* argv[]) { po::options_description desc("Allowed options"); desc.add_options()("help,h", "produce help message")( @@ -67,12 +94,16 @@ int main(int argc, char* argv[]) { "encryption", po::value()->default_value(false), "true if encryption")("certificate", po::value(), "path of the certificate file")( - "key", po::value(), "path of the certificate key file")( - "hostname", po::value>(), - "hosts supervised by this agent (if none given we use name of this " + "private_key", po::value(), + "path of the certificate key file")( + "ca_certificate", po::value(), + "path of the certificate authority file")( + "ca_name", po::value(), "hostname of the certificate")( + "host", po::value(), + "host supervised by this agent (if none given we use name of this " "host)")("grpc-streaming", po::value()->default_value(true), "this agent connect to engine in streaming mode")( - "reversed-grpc-streaming", po::value()->default_value(true), + "reversed-grpc-streaming", po::value()->default_value(false), "this agent accept connection from engine in streaming mode")( "logger-type", po::value()->default_value("stdout"), "type of logger: stdout, file")( @@ -142,9 +173,50 @@ int main(int argc, char* argv[]) { "centreon-agent start, you can decrease log " "level by kill -USR1 {} or increase by kill -USR2 {}", getpid(), getpid()); + std::shared_ptr conf; + std::string supervised_host; + try { + asio::signal_set signals(*g_io_context, SIGTERM, SIGUSR1, SIGUSR2); + signals.async_wait(signal_handler); + if (!vm.count("endpoint")) { + SPDLOG_CRITICAL( + "endpoint param is mandatory (represents where to connect or where " + "to listen example: 127.0.0.1:4317)"); + return -1; + } + std::string host_port = vm["endpoint"].as(); + std::string ca_name; + if (vm.count("ca_name")) { + ca_name = vm["ca_name"].as(); + } - asio::signal_set signals(*g_io_context, SIGTERM, SIGUSR1, SIGUSR2); - signals.async_wait(signal_handler); + if (vm.count("host")) { + supervised_host = vm["host"].as(); + } + if (supervised_host.empty()) { + supervised_host = boost::asio::ip::host_name(); + } + + conf = std::make_shared( + host_port, vm["encryption"].as(), + read_crypto_file("certificate", vm), + read_crypto_file("private_key", vm), + read_crypto_file("ca_certificate", vm), ca_name, true, 30); + + } catch (const std::exception& e) { + SPDLOG_CRITICAL("fail to parse input params: {}", e.what()); + return -1; + } + + if (vm["grpc-streaming"].as()) { + if (vm["reversed-grpc-streaming"].as()) { + _streaming_server = + streaming_server::load(g_io_context, g_logger, conf, supervised_host); + } else { + _streaming_client = + streaming_client::load(g_io_context, g_logger, conf, supervised_host); + } + } try { g_io_context->run(); diff --git a/agent/src/scheduler.cc b/agent/src/scheduler.cc index 9949f3bc207..207ef35721e 100644 --- a/agent/src/scheduler.cc +++ b/agent/src/scheduler.cc @@ -39,7 +39,7 @@ void scheduler::_start() { */ void scheduler::_start_send_timer() { _next_send_time_point += - std::chrono::seconds(_conf->config().second_export_period()); + std::chrono::seconds(_conf->config().export_period()); _send_timer.expires_at(_next_send_time_point); _send_timer.async_wait( [me = shared_from_this()](const boost::system::error_code& err) { @@ -56,7 +56,7 @@ void scheduler::_send_timer_handler(const boost::system::error_code& err) { if (err) { return; } - if (_current_request->resource_metrics_size() > 0) { + if (_current_request->mutable_otel_request()->resource_metrics_size() > 0) { _metric_sender(_current_request); _init_export_request(); } @@ -68,20 +68,20 @@ void scheduler::_send_timer_handler(const boost::system::error_code& err) { * */ void scheduler::_init_export_request() { - _current_request = std::make_shared(); - _host_serv_to_scope_metrics.clear(); + _current_request = std::make_shared(); + _serv_to_scope_metrics.clear(); } /** * @brief create a default empty configuration to scheduler * */ -std::shared_ptr +std::shared_ptr scheduler::default_config() { - std::shared_ptr ret = - std::make_shared(); - ret->mutable_config()->set_second_check_interval(1); - ret->mutable_config()->set_second_export_period(1); + std::shared_ptr ret = + std::make_shared(); + ret->mutable_config()->set_check_interval(1); + ret->mutable_config()->set_export_period(1); ret->mutable_config()->set_max_concurrent_checks(10); return ret; } @@ -142,58 +142,49 @@ void scheduler::_start_waiting_check() { * It initialize check queue and restart all checks schedule * running checks stay alive but their completion will not be handled * We compute start_expected of checks in order to spread checks over - * second_check_interval + * check_interval * @param conf */ void scheduler::update(const engine_to_agent_request_ptr& conf) { _check_queue.clear(); _active_check = 0; - size_t nb_check = 0; - for (const auto& hst : conf->config().hosts()) { - nb_check += hst.services().size(); - } + size_t nb_check = conf->config().services().size(); - if (conf->config().second_check_interval() <= 0) { + if (conf->config().check_interval() <= 0) { SPDLOG_LOGGER_ERROR( - _logger, - "second_check_interval cannot be null => no configuration update"); + _logger, "check_interval cannot be null => no configuration update"); return; } SPDLOG_LOGGER_INFO(_logger, "schedule {} checks to execute in {}s", nb_check, - conf->config().second_check_interval()); + conf->config().check_interval()); if (nb_check > 0) { duration check_interval = - std::chrono::microseconds(conf->config().second_check_interval() * - 1000000) / + std::chrono::microseconds(conf->config().check_interval() * 1000000) / nb_check; - time_point next = std::chrono::system_clock::now() + check_interval; - for (const auto& hst : conf->config().hosts()) { - for (const auto& serv : hst.services()) { - if (_logger->level() == spdlog::level::trace) { - SPDLOG_LOGGER_TRACE( - _logger, - "check expected to start at {} for host {} service {} command {}", - next, hst.host(), serv.service_description(), - serv.command_line()); - } else { - SPDLOG_LOGGER_TRACE( - _logger, "check expected to start at {} for host {} service {}", - next, hst.host(), serv.service_description()); - } - _check_queue.emplace(_check_builder( - _io_context, _logger, next, hst.host(), serv.service_description(), - serv.command_name(), serv.command_line(), conf, - [me = shared_from_this()]( - const std::shared_ptr& check, unsigned status, - const std::list& perfdata, - const std::list& outputs) { - me->_check_handler(check, status, perfdata, outputs); - })); - next += check_interval; + time_point next = std::chrono::system_clock::now(); + for (const auto& serv : conf->config().services()) { + if (_logger->level() == spdlog::level::trace) { + SPDLOG_LOGGER_TRACE( + _logger, "check expected to start at {} for service {} command {}", + next, serv.service_description(), serv.command_line()); + } else { + SPDLOG_LOGGER_TRACE(_logger, + "check expected to start at {} for service {}", + next, serv.service_description()); } + _check_queue.emplace(_check_builder( + _io_context, _logger, next, serv.service_description(), + serv.command_name(), serv.command_line(), conf, + [me = shared_from_this()]( + const std::shared_ptr& check, unsigned status, + const std::list& perfdata, + const std::list& outputs) { + me->_check_handler(check, status, perfdata, outputs); + })); + next += check_interval; } } @@ -208,15 +199,13 @@ void scheduler::update(const engine_to_agent_request_ptr& conf) { void scheduler::_start_check(const check::pointer& check) { ++_active_check; if (_logger->level() <= spdlog::level::trace) { - SPDLOG_LOGGER_TRACE( - _logger, "start check for host {} service {} command {}", - check->get_host(), check->get_service(), check->get_command_line()); + SPDLOG_LOGGER_TRACE(_logger, "start check for service {} command {}", + check->get_service(), check->get_command_line()); } else { - SPDLOG_LOGGER_DEBUG(_logger, "start check for host {} service {}", - check->get_host(), check->get_service()); + SPDLOG_LOGGER_DEBUG(_logger, "start check for service {}", + check->get_service()); } - check->start_check( - std::chrono::seconds(_conf->config().second_check_timeout())); + check->start_check(std::chrono::seconds(_conf->config().check_timeout())); } /** @@ -233,17 +222,16 @@ void scheduler::_check_handler( unsigned status, const std::list& perfdata, const std::list& outputs) { - SPDLOG_LOGGER_TRACE(_logger, "end check for host {} service {} command {}", - check->get_host(), check->get_service(), - check->get_command_line()); + SPDLOG_LOGGER_TRACE(_logger, "end check for service {} command {}", + check->get_service(), check->get_command_line()); // conf has changed => no repush for next check if (check->get_conf() != _conf) { return; } - if (_conf->config().use_examplar()) { - _store_result_in_metrics_and_examplars(check, status, perfdata, outputs); + if (_conf->config().use_exemplar()) { + _store_result_in_metrics_and_exemplars(check, status, perfdata, outputs); } else { _store_result_in_metrics(check, status, perfdata, outputs); } @@ -253,7 +241,7 @@ void scheduler::_check_handler( if (_alive) { // repush for next check check->add_duration_to_start_expected( - std::chrono::seconds(_conf->config().second_check_interval())); + std::chrono::seconds(_conf->config().check_interval())); _check_queue.insert(check); // we have decreased _active_check, so we can launch another check @@ -266,9 +254,11 @@ void scheduler::_check_handler( * */ void scheduler::stop() { - _alive = false; - _send_timer.cancel(); - _check_timer.cancel(); + if (_alive) { + _alive = false; + _send_timer.cancel(); + _check_timer.cancel(); + } } /** @@ -314,13 +304,12 @@ void scheduler::_store_result_in_metrics( * @param perfdata * @param outputs */ -void scheduler::_store_result_in_metrics_and_examplars( +void scheduler::_store_result_in_metrics_and_exemplars( const check::pointer& check, unsigned status, const std::list& perfdata, const std::list& outputs) { - auto& scope_metrics = - _get_scope_metrics(check->get_host(), check->get_service()); + auto& scope_metrics = _get_scope_metrics(check->get_service()); uint64_t now = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); @@ -346,23 +335,21 @@ void scheduler::_store_result_in_metrics_and_examplars( * @brief metrics are grouped by host service * (one resource_metrics by host serv pair) * - * @param host * @param service * @return scheduler::scope_metric_request& */ scheduler::scope_metric_request& scheduler::_get_scope_metrics( - const std::string& host, const std::string& service) { - auto exist = _host_serv_to_scope_metrics.find({host, service}); - if (exist != _host_serv_to_scope_metrics.end()) { + auto exist = _serv_to_scope_metrics.find(service); + if (exist != _serv_to_scope_metrics.end()) { return exist->second; } ::opentelemetry::proto::metrics::v1::ResourceMetrics* new_res = - _current_request->add_resource_metrics(); + _current_request->mutable_otel_request()->add_resource_metrics(); auto* host_attrib = new_res->mutable_resource()->add_attributes(); host_attrib->set_key("host.name"); - host_attrib->mutable_value()->set_string_value(host); + host_attrib->mutable_value()->set_string_value(_supervised_host); auto* serv_attrib = new_res->mutable_resource()->add_attributes(); serv_attrib->set_key("service.name"); serv_attrib->mutable_value()->set_string_value(service); @@ -373,9 +360,7 @@ scheduler::scope_metric_request& scheduler::_get_scope_metrics( scope_metric_request to_insert; to_insert.scope_metric = new_scope; - return _host_serv_to_scope_metrics - .emplace(std::make_pair(host, service), to_insert) - .first->second; + return _serv_to_scope_metrics.emplace(service, to_insert).first->second; } /** diff --git a/agent/src/streaming_client.cc b/agent/src/streaming_client.cc new file mode 100644 index 00000000000..cca96413294 --- /dev/null +++ b/agent/src/streaming_client.cc @@ -0,0 +1,229 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include "streaming_client.hh" +#include "check_exec.hh" +#include "com/centreon/clib/version.hh" +#include "com/centreon/common/defer.hh" + +using namespace com::centreon::agent; + +/** + * @brief Construct a new client reactor::client reactor object + * + * @param io_context + * @param parent we will keep a weak_ptr on streaming_client object + */ +client_reactor::client_reactor( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + + const std::shared_ptr& parent, + const std::string& peer) + : bireactor<::grpc::ClientBidiReactor>( + io_context, + logger, + "client", + peer), + _parent(parent) {} + +/** + * @brief pass request to streaming_client parent + * + * @param request + */ +void client_reactor::on_incomming_request( + const std::shared_ptr& request) { + std::shared_ptr parent = _parent.lock(); + if (!parent) { + shutdown(); + } else { + parent->on_incomming_request(shared_from_this(), request); + } +} + +/** + * @brief called whe OnReadDone or OnWriteDone ok parameter is false + * + */ +void client_reactor::on_error() { + std::shared_ptr parent = _parent.lock(); + if (parent) { + parent->on_error(shared_from_this()); + } +} + +/** + * @brief shutdown connection to engine if not yet done + * + */ +void client_reactor::shutdown() { + std::lock_guard l(_protect); + if (_alive) { + _alive = false; + bireactor<::grpc::ClientBidiReactor>::shutdown(); + RemoveHold(); + _context.TryCancel(); + } +} + +/** + * @brief Construct a new streaming client::streaming client object + * not use it, use load instead + * + * @param io_context + * @param conf + * @param supervised_hosts + */ +streaming_client::streaming_client( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::shared_ptr& conf, + const std::string& supervised_host) + : com::centreon::common::grpc::grpc_client_base(conf, logger), + _io_context(io_context), + _logger(logger), + _supervised_host(supervised_host) { + _stub = std::move(AgentService::NewStub(_channel)); +} + +/** + * @brief to call after construction + * + */ +void streaming_client::_start() { + std::weak_ptr weak_this = shared_from_this(); + + _sched = scheduler::load( + _io_context, _logger, _supervised_host, scheduler::default_config(), + [sender = std::move(weak_this)]( + const std::shared_ptr& request) { + auto parent = sender.lock(); + if (parent) { + parent->_send(request); + } + }, + check_exec::load); + _create_reactor(); +} + +/** + * @brief create reactor on current grpc channel + * and send agent infos (hostname, supervised hosts, collect version) + * + */ +void streaming_client::_create_reactor() { + if (_reactor) { + _reactor->shutdown(); + } + _reactor = std::make_shared( + _io_context, _logger, shared_from_this(), get_conf()->get_hostport()); + client_reactor::register_stream(_reactor); + _stub->async()->Export(&_reactor->get_context(), _reactor.get()); + _reactor->start_read(); + _reactor->AddHold(); + _reactor->StartCall(); + + // identifies to engine + std::shared_ptr who_i_am = + std::make_shared(); + auto infos = who_i_am->mutable_init(); + + infos->mutable_centreon_version()->set_major( + com::centreon::clib::version::major); + infos->mutable_centreon_version()->set_minor( + com::centreon::clib::version::minor); + infos->mutable_centreon_version()->set_patch( + com::centreon::clib::version::patch); + + infos->set_host(_supervised_host); + + _reactor->write(who_i_am); +} + +/** + * @brief construct a new streaming_client + * + * @param io_context + * @param conf + * @param supervised_hosts list of host to supervise (match to engine config) + * @return std::shared_ptr + */ +std::shared_ptr streaming_client::load( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::shared_ptr& conf, + const std::string& supervised_host) { + std::shared_ptr ret = std::make_shared( + io_context, logger, conf, supervised_host); + ret->_start(); + return ret; +} + +/** + * @brief send a request to engine + * + * @param request + */ +void streaming_client::_send(const std::shared_ptr& request) { + std::lock_guard l(_protect); + if (_reactor) + _reactor->write(request); +} + +/** + * @brief + * + * @param caller + * @param request + */ +void streaming_client::on_incomming_request( + const std::shared_ptr& caller, + const std::shared_ptr& request) { + // incoming request is used in main thread + _io_context->post([request, sched = _sched]() { sched->update(request); }); +} + +/** + * @brief called by _reactor when something was wrong + * Then we wait 10s to reconnect to engine + * + * @param caller + */ +void streaming_client::on_error(const std::shared_ptr& caller) { + std::lock_guard l(_protect); + if (caller == _reactor) { + _reactor.reset(); + common::defer(_io_context, std::chrono::seconds(10), + [me = shared_from_this()] { me->_create_reactor(); }); + } +} + +/** + * @brief stop and shutdown scheduler and connection + * After, this object is dead and must be deleted + * + */ +void streaming_client::shutdown() { + std::lock_guard l(_protect); + _sched->stop(); + if (_reactor) { + _reactor->shutdown(); + } +} diff --git a/agent/src/streaming_server.cc b/agent/src/streaming_server.cc new file mode 100644 index 00000000000..cfc23fabb11 --- /dev/null +++ b/agent/src/streaming_server.cc @@ -0,0 +1,237 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include "streaming_server.hh" +#include "check_exec.hh" +#include "com/centreon/clib/version.hh" +#include "scheduler.hh" + +using namespace com::centreon::agent; + +namespace com::centreon::agent { + +class server_reactor + : public bireactor< + ::grpc::ServerBidiReactor> { + std::shared_ptr _sched; + std::string _supervised_host; + + void _start(); + + public: + server_reactor(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::string& supervised_hosts, + const std::string& peer); + + static std::shared_ptr load( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::string& supervised_hosts, + const std::string& peer); + + std::shared_ptr shared_from_this() { + return std::static_pointer_cast( + bireactor<::grpc::ServerBidiReactor>:: + shared_from_this()); + } + + void on_incomming_request( + const std::shared_ptr& request) override; + + void on_error() override; + + void shutdown() override; +}; + +server_reactor::server_reactor( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::string& supervised_host, + const std::string& peer) + : bireactor<::grpc::ServerBidiReactor>( + io_context, + logger, + "server", + peer), + _supervised_host(supervised_host) {} + +void server_reactor::_start() { + std::weak_ptr weak_this(shared_from_this()); + + _sched = scheduler::load( + _io_context, _logger, _supervised_host, scheduler::default_config(), + [sender = std::move(weak_this)]( + const std::shared_ptr& request) { + auto parent = sender.lock(); + if (parent) { + parent->write(request); + } + }, + check_exec::load); + + // identifies to engine + std::shared_ptr who_i_am = + std::make_shared(); + auto infos = who_i_am->mutable_init(); + + infos->mutable_centreon_version()->set_major( + com::centreon::clib::version::major); + infos->mutable_centreon_version()->set_minor( + com::centreon::clib::version::minor); + infos->mutable_centreon_version()->set_patch( + com::centreon::clib::version::patch); + infos->set_host(_supervised_host); + + write(who_i_am); +} + +std::shared_ptr server_reactor::load( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::string& supervised_host, + const std::string& peer) { + std::shared_ptr ret = std::make_shared( + io_context, logger, supervised_host, peer); + ret->_start(); + return ret; +} + +void server_reactor::on_incomming_request( + const std::shared_ptr& request) { + _io_context->post([sched = _sched, request]() { sched->update(request); }); +} + +void server_reactor::on_error() { + shutdown(); +} + +void server_reactor::shutdown() { + std::lock_guard l(_protect); + if (_alive) { + _alive = false; + _sched->stop(); + bireactor<::grpc::ServerBidiReactor>::shutdown(); + Finish(::grpc::Status::CANCELLED); + } +} + +} // namespace com::centreon::agent + +/** + * @brief Construct a new streaming server::streaming server object + * Not use it, use load instead + * @param io_context + * @param conf + * @param supervised_hosts list of supervised hosts that will be sent to engine + * in order to have checks configuration + */ +streaming_server::streaming_server( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::shared_ptr& conf, + const std::string& supervised_host) + : com::centreon::common::grpc::grpc_server_base(conf, logger), + _io_context(io_context), + _logger(logger), + _supervised_host(supervised_host) { + SPDLOG_LOGGER_INFO(_logger, "create grpc server listening on {}", + conf->get_hostport()); +} + +streaming_server::~streaming_server() { + SPDLOG_LOGGER_INFO(_logger, "delete grpc server listening on {}", + get_conf()->get_hostport()); +} + +/** + * @brief register service and start grpc server + * + */ +void streaming_server::_start() { + ::grpc::Service::MarkMethodCallback( + 0, new ::grpc::internal::CallbackBidiHandler< + ::com::centreon::agent::MessageToAgent, + ::com::centreon::agent::MessageFromAgent>( + [me = shared_from_this()](::grpc::CallbackServerContext* context) { + return me->Import(context); + })); + + _init([this](::grpc::ServerBuilder& builder) { + builder.RegisterService(this); + }); +} + +/** + * @brief construct and start a new streaming_server + * + * @param io_context + * @param conf + * @param supervised_hosts list of supervised hosts that will be sent to engine + * in order to have checks configuration + * @return std::shared_ptr + */ +std::shared_ptr streaming_server::load( + const std::shared_ptr& io_context, + const std::shared_ptr& logger, + const std::shared_ptr& conf, + const std::string& supervised_host) { + std::shared_ptr ret = std::make_shared( + io_context, logger, conf, supervised_host); + ret->_start(); + return ret; +} + +/** + * @brief shutdown server and incoming connection + * + */ +void streaming_server::shutdown() { + SPDLOG_LOGGER_INFO(_logger, "shutdown grpc server listening on {}", + get_conf()->get_hostport()); + { + std::lock_guard l(_protect); + if (_incoming) { + _incoming->shutdown(); + _incoming.reset(); + } + } + common::grpc::grpc_server_base::shutdown(std::chrono::seconds(10)); +} + +/** + * @brief callback called on incoming connection + * + * @param context + * @return ::grpc::ServerBidiReactor* = + * _incoming + */ +::grpc::ServerBidiReactor* +streaming_server::Import(::grpc::CallbackServerContext* context) { + SPDLOG_LOGGER_INFO(_logger, "incoming connection from {}", context->peer()); + std::lock_guard l(_protect); + if (_incoming) { + _incoming->shutdown(); + } + _incoming = server_reactor::load(_io_context, _logger, _supervised_host, + context->peer()); + server_reactor::register_stream(_incoming); + _incoming->start_read(); + return _incoming.get(); +} diff --git a/agent/test/CMakeLists.txt b/agent/test/CMakeLists.txt index f3825ed76a9..b60ff2f6c73 100644 --- a/agent/test/CMakeLists.txt +++ b/agent/test/CMakeLists.txt @@ -43,7 +43,9 @@ target_link_libraries(ut_agent PRIVATE GTest::gtest_main GTest::gmock GTest::gmock_main - -L${PROTOBUF_LIB_DIR} + -L${Boost_LIBRARY_DIR_RELEASE} + boost_program_options + -L${PROTOBUF_LIB_DIR} gRPC::gpr gRPC::grpc gRPC::grpc++ gRPC::grpc++_alts fmt::fmt pthread crypto ssl diff --git a/agent/test/check_exec_test.cc b/agent/test/check_exec_test.cc index 849a19b0da0..48705e84c60 100644 --- a/agent/test/check_exec_test.cc +++ b/agent/test/check_exec_test.cc @@ -24,7 +24,6 @@ using namespace com::centreon::agent; extern std::shared_ptr g_io_context; -static const std::string host("host"); static const std::string serv("serv"); static const std::string cmd_name("command"); static std::string command_line; @@ -35,8 +34,8 @@ TEST(check_exec_test, echo) { std::list outputs; std::condition_variable cond; std::shared_ptr check = check_exec::load( - g_io_context, spdlog::default_logger(), time_point(), host, serv, - cmd_name, command_line, engine_to_agent_request_ptr(), + g_io_context, spdlog::default_logger(), time_point(), serv, cmd_name, + command_line, engine_to_agent_request_ptr(), [&](const std::shared_ptr& caller, int statuss, const std::list& perfdata, @@ -61,8 +60,8 @@ TEST(check_exec_test, timeout) { std::list outputs; std::condition_variable cond; std::shared_ptr check = check_exec::load( - g_io_context, spdlog::default_logger(), time_point(), host, serv, - cmd_name, command_line, engine_to_agent_request_ptr(), + g_io_context, spdlog::default_logger(), time_point(), serv, cmd_name, + command_line, engine_to_agent_request_ptr(), [&](const std::shared_ptr& caller, int statuss, const std::list& perfdata, @@ -87,15 +86,15 @@ TEST(check_exec_test, bad_command) { std::list outputs; std::condition_variable cond; std::shared_ptr check = check_exec::load( - g_io_context, spdlog::default_logger(), time_point(), host, serv, - cmd_name, command_line, engine_to_agent_request_ptr(), + g_io_context, spdlog::default_logger(), time_point(), serv, cmd_name, + command_line, engine_to_agent_request_ptr(), [&](const std::shared_ptr& caller, int statuss, const std::list& perfdata, const std::list& output) { status = statuss; outputs = output; - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); cond.notify_one(); }); check->start_check(std::chrono::seconds(1)); diff --git a/agent/test/check_test.cc b/agent/test/check_test.cc index 4117f7fefb3..1a09b0761cf 100644 --- a/agent/test/check_test.cc +++ b/agent/test/check_test.cc @@ -45,8 +45,7 @@ class dummy_check : public check { } template - dummy_check(const std::string& host, - const std::string& serv, + dummy_check(const std::string& serv, const std::string& command_name, const std::string& command_line, const duration& command_duration, @@ -54,7 +53,6 @@ class dummy_check : public check { : check(g_io_context, spdlog::default_logger(), std::chrono::system_clock::now(), - host, serv, command_name, command_line, @@ -64,7 +62,6 @@ class dummy_check : public check { _command_timer(*g_io_context) {} }; -static std::string hst("my_host"); static std::string serv("my_serv"); static std::string cmd_name("my_command_name"); static std::string cmd_line("my_command_line"); @@ -77,7 +74,7 @@ TEST(check_test, timeout) { unsigned handler_call_cpt = 0; std::shared_ptr checker = std::make_shared( - hst, serv, cmd_name, cmd_line, std::chrono::milliseconds(500), + serv, cmd_name, cmd_line, std::chrono::milliseconds(500), [&status, &output, &handler_call_cpt, &cond]( const std::shared_ptr&, unsigned statuss, const std::list& perfdata, @@ -114,7 +111,7 @@ TEST(check_test, no_timeout) { unsigned handler_call_cpt = 0; std::shared_ptr checker = std::make_shared( - hst, serv, cmd_name, cmd_line, std::chrono::milliseconds(100), + serv, cmd_name, cmd_line, std::chrono::milliseconds(100), [&status, &output, &handler_call_cpt, &cond]( const std::shared_ptr&, unsigned statuss, const std::list& perfdata, diff --git a/agent/test/scheduler_test.cc b/agent/test/scheduler_test.cc index 4095adf7da3..653edd9ef80 100644 --- a/agent/test/scheduler_test.cc +++ b/agent/test/scheduler_test.cc @@ -37,7 +37,6 @@ class tempo_check : public check { tempo_check(const std::shared_ptr& io_context, const std::shared_ptr& logger, time_point exp, - const std::string& hst, const std::string& serv, const std::string& cmd_name, const std::string& cmd_line, @@ -48,12 +47,11 @@ class tempo_check : public check { : check(io_context, logger, exp, - hst, serv, cmd_name, cmd_line, cnf, - handler), + std::move(handler)), _completion_timer(*io_context), _command_exit_status(command_exit_status), _completion_delay(completion_delay) {} @@ -69,8 +67,7 @@ class tempo_check : public check { check_running_index = _get_running_check_index()]( const boost::system::error_code& err) { - SPDLOG_TRACE("end of completion timer for host {}, serv {}", get_host(), - get_service()); + SPDLOG_TRACE("end of completion timer for serv {}", get_service()); me->on_completion( check_running_index, _command_exit_status, com::centreon::common::perfdata::parse_perfdata( @@ -96,53 +93,46 @@ class scheduler_test : public ::testing::Test { spdlog::default_logger()->set_level(spdlog::level::trace); } - std::shared_ptr create_conf( - unsigned nb_host, - unsigned nb_serv_by_host, + std::shared_ptr create_conf( + unsigned nb_serv, unsigned second_check_period, - unsigned second_export_period, + unsigned export_period, unsigned max_concurent_check, - unsigned second_check_timeout); + unsigned check_timeout); }; -std::shared_ptr -scheduler_test::create_conf(unsigned nb_host, - unsigned nb_serv_by_host, +std::shared_ptr +scheduler_test::create_conf(unsigned nb_serv, unsigned second_check_period, - unsigned second_export_period, + unsigned export_period, unsigned max_concurent_check, - unsigned second_check_timeout) { - std::shared_ptr conf = - std::make_shared(); + unsigned check_timeout) { + std::shared_ptr conf = + std::make_shared(); auto cnf = conf->mutable_config(); - cnf->set_second_check_interval(second_check_period); - cnf->set_second_export_period(second_export_period); + cnf->set_check_interval(second_check_period); + cnf->set_export_period(export_period); cnf->set_max_concurrent_checks(max_concurent_check); - cnf->set_second_check_timeout(second_check_timeout); - cnf->set_use_examplar(true); - for (; nb_host; --nb_host) { - auto hst = cnf->add_hosts(); - hst->set_host(fmt::format("host{}", nb_host)); - for (unsigned serv_index = 0; serv_index < nb_serv_by_host; ++serv_index) { - auto serv = hst->add_services(); - serv->set_service_description( - fmt::format("serv{}", nb_host * nb_serv_by_host + serv_index)); - serv->set_command_name( - fmt::format("command{}", nb_host * nb_serv_by_host + serv_index)); - serv->set_command_line("/usr/bin/ls"); - } + cnf->set_check_timeout(check_timeout); + cnf->set_use_exemplar(true); + for (unsigned serv_index = 0; serv_index < nb_serv; ++serv_index) { + auto serv = cnf->add_services(); + serv->set_service_description(fmt::format("serv{}", serv_index + 1)); + serv->set_command_name(fmt::format("command{}", serv_index + 1)); + serv->set_command_line("/usr/bin/ls"); } return conf; } TEST_F(scheduler_test, no_config) { std::shared_ptr sched = scheduler::load( - g_io_context, spdlog::default_logger(), scheduler::default_config(), - [](const export_metric_request_ptr&) {}, + g_io_context, spdlog::default_logger(), "my_host", + scheduler::default_config(), + [](const std::shared_ptr&) {}, [](const std::shared_ptr&, const std::shared_ptr&, time_point /* start expected*/, - const std::string& /*host*/, const std::string& /*service*/, - const std::string& /*cmd_name*/, const std::string& /*cmd_line*/, + const std::string& /*service*/, const std::string& /*cmd_name*/, + const std::string& /*cmd_line*/, const engine_to_agent_request_ptr& /*engine to agent request*/, check::completion_handler&&) { return std::shared_ptr(); }); @@ -160,18 +150,18 @@ TEST_F(scheduler_test, no_config) { TEST_F(scheduler_test, correct_schedule) { std::shared_ptr sched = scheduler::load( - g_io_context, spdlog::default_logger(), create_conf(2, 10, 1, 1, 50, 1), - [](const export_metric_request_ptr&) {}, + g_io_context, spdlog::default_logger(), "my_host", + create_conf(20, 1, 1, 50, 1), + [](const std::shared_ptr&) {}, [](const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point start_expected, const std::string& host, - const std::string& service, const std::string& cmd_name, - const std::string& cmd_line, + time_point start_expected, const std::string& service, + const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, check::completion_handler&& handler) { return std::make_shared( - io_context, logger, start_expected, host, service, cmd_name, - cmd_line, engine_to_agent_request, 0, std::chrono::milliseconds(50), + io_context, logger, start_expected, service, cmd_name, cmd_line, + engine_to_agent_request, 0, std::chrono::milliseconds(50), std::move(handler)); }); @@ -229,41 +219,42 @@ TEST_F(scheduler_test, correct_schedule) { } TEST_F(scheduler_test, time_out) { - export_metric_request_ptr exported_request; + std::shared_ptr exported_request; std::condition_variable export_cond; - time_point now = std::chrono::system_clock::now(); + uint64_t expected_completion_time = + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); std::shared_ptr sched = scheduler::load( - g_io_context, spdlog::default_logger(), create_conf(1, 1, 1, 1, 1, 1), - [&](const export_metric_request_ptr& req) { + g_io_context, spdlog::default_logger(), "my_host", + create_conf(1, 1, 1, 1, 1), + [&](const std::shared_ptr& req) { exported_request = req; export_cond.notify_all(); }, [](const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point start_expected, const std::string& host, - const std::string& service, const std::string& cmd_name, - const std::string& cmd_line, + time_point start_expected, const std::string& service, + const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, check::completion_handler&& handler) { return std::make_shared( - io_context, logger, start_expected, host, service, cmd_name, - cmd_line, engine_to_agent_request, 0, - std::chrono::milliseconds(1500), std::move(handler)); + io_context, logger, start_expected, service, cmd_name, cmd_line, + engine_to_agent_request, 0, std::chrono::milliseconds(1500), + std::move(handler)); }); std::mutex m; std::unique_lock l(m); export_cond.wait(l); - uint64_t completion_time = tempo_check::completion_time; - ASSERT_TRUE(exported_request); - ASSERT_EQ(exported_request->resource_metrics_size(), 1); + ASSERT_EQ(exported_request->otel_request().resource_metrics_size(), 1); const ::opentelemetry::proto::metrics::v1::ResourceMetrics& res = - exported_request->resource_metrics()[0]; + exported_request->otel_request().resource_metrics()[0]; const auto& res_attrib = res.resource().attributes(); ASSERT_EQ(res_attrib.size(), 2); ASSERT_EQ(res_attrib.at(0).key(), "host.name"); - ASSERT_EQ(res_attrib.at(0).value().string_value(), "host1"); + ASSERT_EQ(res_attrib.at(0).value().string_value(), "my_host"); ASSERT_EQ(res_attrib.at(1).key(), "service.name"); ASSERT_EQ(res_attrib.at(1).value().string_value(), "serv1"); ASSERT_EQ(res.scope_metrics_size(), 1); @@ -277,32 +268,33 @@ TEST_F(scheduler_test, time_out) { ASSERT_EQ(metric.gauge().data_points_size(), 1); const auto& data_point = metric.gauge().data_points()[0]; ASSERT_EQ(data_point.as_int(), 3); - ASSERT_LE(data_point.time_unix_nano(), completion_time); - ASSERT_GE(data_point.time_unix_nano(), completion_time - 100000000); + // one second first check timer expire + timeout => 2s + ASSERT_GE(data_point.time_unix_nano(), expected_completion_time + 2000000000); + ASSERT_LE(data_point.time_unix_nano(), expected_completion_time + 2500000000); sched->stop(); } TEST_F(scheduler_test, correct_output_examplar) { - export_metric_request_ptr exported_request; + std::shared_ptr exported_request; std::condition_variable export_cond; time_point now = std::chrono::system_clock::now(); std::shared_ptr sched = scheduler::load( - g_io_context, spdlog::default_logger(), create_conf(1, 2, 1, 2, 10, 1), - [&](const export_metric_request_ptr& req) { + g_io_context, spdlog::default_logger(), "my_host", + create_conf(2, 1, 2, 10, 1), + [&](const std::shared_ptr& req) { exported_request = req; export_cond.notify_all(); }, [](const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point start_expected, const std::string& host, - const std::string& service, const std::string& cmd_name, - const std::string& cmd_line, + time_point start_expected, const std::string& service, + const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, check::completion_handler&& handler) { return std::make_shared( - io_context, logger, start_expected, host, service, cmd_name, - cmd_line, engine_to_agent_request, 0, std::chrono::milliseconds(10), + io_context, logger, start_expected, service, cmd_name, cmd_line, + engine_to_agent_request, 0, std::chrono::milliseconds(10), std::move(handler)); }); std::mutex m; @@ -311,17 +303,17 @@ TEST_F(scheduler_test, correct_output_examplar) { ASSERT_TRUE(exported_request); - SPDLOG_INFO("export:{}", exported_request->DebugString()); + SPDLOG_INFO("export:{}", exported_request->otel_request().DebugString()); - ASSERT_EQ(exported_request->resource_metrics_size(), 2); + ASSERT_EQ(exported_request->otel_request().resource_metrics_size(), 2); const ::opentelemetry::proto::metrics::v1::ResourceMetrics& res = - exported_request->resource_metrics()[0]; + exported_request->otel_request().resource_metrics()[0]; const auto& res_attrib = res.resource().attributes(); ASSERT_EQ(res_attrib.size(), 2); ASSERT_EQ(res_attrib.at(0).key(), "host.name"); - ASSERT_EQ(res_attrib.at(0).value().string_value(), "host1"); + ASSERT_EQ(res_attrib.at(0).value().string_value(), "my_host"); ASSERT_EQ(res_attrib.at(1).key(), "service.name"); - ASSERT_EQ(res_attrib.at(1).value().string_value(), "serv2"); + ASSERT_EQ(res_attrib.at(1).value().string_value(), "serv1"); ASSERT_EQ(res.scope_metrics_size(), 1); const ::opentelemetry::proto::metrics::v1::ScopeMetrics& scope_metrics = res.scope_metrics()[0]; @@ -336,13 +328,13 @@ TEST_F(scheduler_test, correct_output_examplar) { uint64_t first_time_point = data_point_state.time_unix_nano(); const ::opentelemetry::proto::metrics::v1::ResourceMetrics& res2 = - exported_request->resource_metrics()[1]; + exported_request->otel_request().resource_metrics()[1]; const auto& res_attrib2 = res2.resource().attributes(); ASSERT_EQ(res_attrib2.size(), 2); ASSERT_EQ(res_attrib2.at(0).key(), "host.name"); - ASSERT_EQ(res_attrib2.at(0).value().string_value(), "host1"); + ASSERT_EQ(res_attrib2.at(0).value().string_value(), "my_host"); ASSERT_EQ(res_attrib2.at(1).key(), "service.name"); - ASSERT_EQ(res_attrib2.at(1).value().string_value(), "serv3"); + ASSERT_EQ(res_attrib2.at(1).value().string_value(), "serv2"); ASSERT_EQ(res2.scope_metrics_size(), 1); const ::opentelemetry::proto::metrics::v1::ScopeMetrics& scope_metrics2 = @@ -375,7 +367,6 @@ class concurent_check : public check { concurent_check(const std::shared_ptr& io_context, const std::shared_ptr& logger, time_point exp, - const std::string& hst, const std::string& serv, const std::string& cmd_name, const std::string& cmd_line, @@ -386,12 +377,11 @@ class concurent_check : public check { : check(io_context, logger, exp, - hst, serv, cmd_name, cmd_line, cnf, - handler), + std::move(handler)), _completion_timer(*io_context), _command_exit_status(command_exit_status), _completion_delay(completion_delay) {} @@ -409,8 +399,7 @@ class concurent_check : public check { const boost::system::error_code& err) { active_checks.erase(this); checked.insert(this); - SPDLOG_TRACE("end of completion timer for host {}, serv {}", get_host(), - get_service()); + SPDLOG_TRACE("end of completion timer for serv {}", get_service()); me->on_completion( check_running_index, _command_exit_status, com::centreon::common::perfdata::parse_perfdata( @@ -429,18 +418,18 @@ unsigned concurent_check::max_active_check; TEST_F(scheduler_test, max_concurent) { std::shared_ptr sched = scheduler::load( - g_io_context, spdlog::default_logger(), create_conf(10, 20, 1, 1, 10, 1), - [&](const export_metric_request_ptr& req) {}, + g_io_context, spdlog::default_logger(), "my_host", + create_conf(200, 1, 1, 10, 1), + [&](const std::shared_ptr& req) {}, [](const std::shared_ptr& io_context, const std::shared_ptr& logger, - time_point start_expected, const std::string& host, - const std::string& service, const std::string& cmd_name, - const std::string& cmd_line, + time_point start_expected, const std::string& service, + const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, check::completion_handler&& handler) { return std::make_shared( - io_context, logger, start_expected, host, service, cmd_name, - cmd_line, engine_to_agent_request, 0, std::chrono::milliseconds(75), + io_context, logger, start_expected, service, cmd_name, cmd_line, + engine_to_agent_request, 0, std::chrono::milliseconds(75), std::move(handler)); }); diff --git a/broker/CMakeLists.txt b/broker/CMakeLists.txt index ad2373471fe..1e4aefc5057 100644 --- a/broker/CMakeLists.txt +++ b/broker/CMakeLists.txt @@ -484,6 +484,8 @@ target_link_libraries( "-Wl,--no-whole-archive" nlohmann_json::nlohmann_json fmt::fmt + -L${Boost_LIBRARY_DIR_RELEASE} + boost_program_options -L${PROTOBUF_LIB_DIR} "-Wl,--whole-archive" gRPC::grpc++ diff --git a/common/grpc/inc/com/centreon/common/grpc/grpc_config.hh b/common/grpc/inc/com/centreon/common/grpc/grpc_config.hh index 2d8b5978be9..4d151fa0baa 100644 --- a/common/grpc/inc/com/centreon/common/grpc/grpc_config.hh +++ b/common/grpc/inc/com/centreon/common/grpc/grpc_config.hh @@ -101,6 +101,37 @@ class grpc_config { _compress == right._compress && _second_keepalive_interval == right._second_keepalive_interval; } + + /** + * @brief identical to std:string::compare + * + * @param right + * @return int -1, 0 if equal or 1 + */ + int compare(const grpc_config& right) const { + int ret = _hostport.compare(right._hostport); + if (ret) + return ret; + ret = _crypted - right._crypted; + if (ret) + return ret; + ret = _certificate.compare(right._certificate); + if (ret) + return ret; + ret = _cert_key.compare(right._cert_key); + if (ret) + return ret; + ret = _ca_cert.compare(right._ca_cert); + if (ret) + return ret; + ret = _ca_name.compare(right._ca_name); + if (ret) + return ret; + ret = _compress - right._compress; + if (ret) + return ret; + return _second_keepalive_interval - right._second_keepalive_interval; + } }; } // namespace com::centreon::common::grpc diff --git a/common/inc/com/centreon/common/process.hh b/common/inc/com/centreon/common/process.hh index 74562bfc18b..caca6a1dbc9 100644 --- a/common/inc/com/centreon/common/process.hh +++ b/common/inc/com/centreon/common/process.hh @@ -38,9 +38,6 @@ struct boost_process; * locked */ class process : public std::enable_shared_from_this { - std::shared_ptr _io_context; - std::shared_ptr _logger; - std::string _exe_path; std::vector _args; @@ -62,6 +59,9 @@ class process : public std::enable_shared_from_this { void stderr_read(); protected: + std::shared_ptr _io_context; + std::shared_ptr _logger; + char _stdout_read_buffer[0x1000] ABSL_GUARDED_BY(_protect); char _stderr_read_buffer[0x1000] ABSL_GUARDED_BY(_protect); @@ -132,10 +132,10 @@ process::process(const std::shared_ptr& io_context, const std::string_view& exe_path, string_iterator arg_begin, string_iterator arg_end) - : _io_context(io_context), - _logger(logger), - _exe_path(exe_path), - _args(arg_begin, arg_end) {} + : _exe_path(exe_path), + _args(arg_begin, arg_end), + _io_context(io_context), + _logger(logger) {} /** * @brief Construct a new process::process object @@ -151,10 +151,10 @@ process::process(const std::shared_ptr& io_context, const std::shared_ptr& logger, const std::string_view& exe_path, const args_container& args) - : _io_context(io_context), - _logger(logger), - _exe_path(exe_path), - _args(args) {} + : _exe_path(exe_path), + _args(args), + _io_context(io_context), + _logger(logger) {} /** * @brief Construct a new process::process object @@ -171,7 +171,7 @@ process::process(const std::shared_ptr& io_context, const std::shared_ptr& logger, const std::string_view& exe_path, const std::initializer_list& args) - : _io_context(io_context), _logger(logger), _exe_path(exe_path) { + : _exe_path(exe_path), _io_context(io_context), _logger(logger) { _args.reserve(args.size()); for (const auto& str : args) { _args.emplace_back(str); diff --git a/common/src/perfdata.cc b/common/src/perfdata.cc index 43efaed8f36..fab01b147f5 100644 --- a/common/src/perfdata.cc +++ b/common/src/perfdata.cc @@ -286,10 +286,6 @@ std::list perfdata::parse_perfdata( error = true; } - if (p._name == "bar") { - int ii = 1; - } - // Check format. if (*tmp != '=') { int i; diff --git a/common/src/process.cc b/common/src/process.cc index c66e1cbcc21..9e0282b38fb 100644 --- a/common/src/process.cc +++ b/common/src/process.cc @@ -19,6 +19,7 @@ #include #include +#include #include "process.hh" @@ -63,8 +64,7 @@ process::process(const std::shared_ptr& io_context, const std::shared_ptr& logger, const std::string_view& cmd_line) : _io_context(io_context), _logger(logger) { - auto split_res = - absl::StrSplit(cmd_line, absl::ByAnyChar(" \t"), absl::SkipEmpty()); + auto split_res = boost::program_options::split_unix(std::string(cmd_line)); if (split_res.begin() == split_res.end()) { SPDLOG_LOGGER_ERROR(_logger, "empty command line:\"{}\"", cmd_line); throw exceptions::msg_fmt("empty command line:\"{}\"", cmd_line); diff --git a/common/tests/CMakeLists.txt b/common/tests/CMakeLists.txt index 1bd982a7743..0c3a0c30d16 100644 --- a/common/tests/CMakeLists.txt +++ b/common/tests/CMakeLists.txt @@ -50,6 +50,8 @@ target_link_libraries( ut_common PRIVATE centreon_common centreon_http + -L${Boost_LIBRARY_DIR_RELEASE} + boost_program_options re2::re2 log_v2 crypto diff --git a/engine/modules/opentelemetry/CMakeLists.txt b/engine/modules/opentelemetry/CMakeLists.txt index 772150afa4b..bb13dac0417 100644 --- a/engine/modules/opentelemetry/CMakeLists.txt +++ b/engine/modules/opentelemetry/CMakeLists.txt @@ -64,7 +64,12 @@ add_custom_command( add_library(opentelemetry SHARED ${SRC_DIR}/centreon_agent/agent.grpc.pb.cc ${SRC_DIR}/centreon_agent/agent.pb.cc +${SRC_DIR}/centreon_agent/agent_check_result_builder.cc +${SRC_DIR}/centreon_agent/agent_config.cc ${SRC_DIR}/centreon_agent/agent_impl.cc +${SRC_DIR}/centreon_agent/agent_reverse_client.cc +${SRC_DIR}/centreon_agent/agent_service.cc +${SRC_DIR}/centreon_agent/to_agent_connector.cc ${SRC_DIR}/data_point_fifo.cc ${SRC_DIR}/data_point_fifo_container.cc ${SRC_DIR}/grpc_config.cc diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_check_result_builder.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_check_result_builder.hh new file mode 100644 index 00000000000..f115d67befb --- /dev/null +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_check_result_builder.hh @@ -0,0 +1,117 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#ifndef CCE_MOD_OTL_AGENT_CHECK_RESULT_BUILDER_HH +#define CCE_MOD_OTL_AGENT_CHECK_RESULT_BUILDER_HH + +namespace com::centreon::engine::modules::opentelemetry::centreon_agent { + +/** + * @brief in order to save network usage, agent store metrics infos in examplar + * An example of protobuf data: + * @code {.json} + { + "name": "metric2", + "unit": "ms", + "gauge": { + "dataPoints": [ + { + "timeUnixNano": "1718345061381922153", + "exemplars": [ + { + "asDouble": 80, + "filteredAttributes": [ + { + "key": "crit_gt" + } + ] + }, + { + "asDouble": 75, + "filteredAttributes": [ + { + "key": "crit_lt" + } + ] + }, + { + "asDouble": 75, + "filteredAttributes": [ + { + "key": "warn_gt" + } + ] + }, + { + "asDouble": 50, + "filteredAttributes": [ + { + "key": "warn_lt" + } + ] + }, + { + "asDouble": 0, + "filteredAttributes": [ + { + "key": "min" + } + ] + }, + { + "asDouble": 100, + "filteredAttributes": [ + { + "key": "max" + } + ] + } + ], + "asInt": "30" + } + ] + } + * @endcode + * + * + */ +class agent_check_result_builder : public otl_check_result_builder { + protected: + bool _build_result_from_metrics(metric_name_to_fifo& fifos, + commands::result& res) override; + + public: + agent_check_result_builder(const std::string& cmd_line, + uint64_t command_id, + const host& host, + const service* service, + std::chrono::system_clock::time_point timeout, + commands::otel::result_callback&& handler, + const std::shared_ptr& logger) + : otl_check_result_builder(cmd_line, + command_id, + host, + service, + timeout, + std::move(handler), + logger) {} +}; + +} // namespace com::centreon::engine::modules::opentelemetry::centreon_agent + +#endif \ No newline at end of file diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh new file mode 100644 index 00000000000..236f5620798 --- /dev/null +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh @@ -0,0 +1,77 @@ +/* + * Copyright 2024 Centreon + * + * This file is part of Centreon Engine. + * + * Centreon Engine is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Centreon Engine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Centreon Engine. If not, see + * . + */ + +#ifndef CCE_MOD_OTL_CENTREON_AGENT_AGENT_CONFIG_HH +#define CCE_MOD_OTL_CENTREON_AGENT_AGENT_CONFIG_HH + +#include "com/centreon/engine/modules/opentelemetry/grpc_config.hh" + +namespace com::centreon::engine::modules::opentelemetry::centreon_agent { + +class agent_config { + public: + using grpc_config_set = + absl::btree_set; + + using pointer = std::shared_ptr; + + private: + // all endpoints engine has to connect to + grpc_config_set _agent_grpc_reverse_conf; + // delay between 2 checks of one service, so we will do all check in that + // period (in seconds) + uint32_t _check_interval; + // limit the number of active checks in order to limit charge + uint32_t _max_concurrent_checks; + // period of metric exports (in seconds) + uint32_t _export_period; + // after this timeout, process is killed (in seconds) + uint32_t _check_timeout; + + public: + agent_config(const rapidjson::Value& json_config_v); + + // used for tests + agent_config(uint32_t check_interval, + uint32_t max_concurrent_checks, + uint32_t export_period, + uint32_t check_timeout); + + agent_config(uint32_t check_interval, + uint32_t max_concurrent_checks, + uint32_t export_period, + uint32_t check_timeout, + const std::initializer_list& endpoints); + + const grpc_config_set& get_agent_grpc_reverse_conf() const { + return _agent_grpc_reverse_conf; + } + + uint32_t get_check_interval() const { return _check_interval; } + uint32_t get_max_concurrent_checks() const { return _max_concurrent_checks; } + uint32_t get_export_period() const { return _export_period; } + uint32_t get_check_timeout() const { return _check_timeout; } + + bool operator==(const agent_config& right) const; + + bool operator!=(const agent_config& right) const { return !(*this == right); } +}; + +}; // namespace com::centreon::engine::modules::opentelemetry::centreon_agent +#endif diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_impl.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_impl.hh index 15ece50b4b0..b654046d4e5 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_impl.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_impl.hh @@ -21,9 +21,89 @@ #define CCE_MOD_OTL_CENTREON_AGENT_AGENT_IMPL_HH #include "centreon_agent/agent.grpc.pb.h" +#include "com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh" +#include "com/centreon/engine/modules/opentelemetry/otl_data_point.hh" namespace com::centreon::engine::modules::opentelemetry::centreon_agent { +template +class agent_impl + : public bireactor_class, + public std::enable_shared_from_this> { + std::shared_ptr _io_context; + const std::string_view _class_name; + + agent_config::pointer _conf ABSL_GUARDED_BY(_protect); + + metric_handler _metric_handler; + + std::shared_ptr _agent_info + ABSL_GUARDED_BY(_protect); + std::shared_ptr _last_sent_config + ABSL_GUARDED_BY(_protect); + + static std::set> _instances + ABSL_GUARDED_BY(_instances_m); + static absl::Mutex _instances_m; + + bool _write_pending; + std::deque> _write_queue + ABSL_GUARDED_BY(_protect); + std::shared_ptr _read_current + ABSL_GUARDED_BY(_protect); + + void _calc_and_send_config_if_needed(); + + virtual const std::string& get_peer() const = 0; + + void _write(const std::shared_ptr& request); + + protected: + std::shared_ptr _logger; + bool _alive ABSL_GUARDED_BY(_protect); + mutable absl::Mutex _protect; + + public: + agent_impl(const std::shared_ptr& io_context, + const std::string_view class_name, + const agent_config::pointer& conf, + const metric_handler& handler, + const std::shared_ptr& logger); + + virtual ~agent_impl(); + + void calc_and_send_config_if_needed(const agent_config::pointer& new_conf); + + static void all_agent_calc_and_send_config_if_needed( + const agent_config::pointer& new_conf); + + static void update_config(); + + void on_request(const std::shared_ptr& request); + + static void register_stream(const std::shared_ptr& strm); + + void start_read(); + + void start_write(); + + // bireactor part + void OnReadDone(bool ok) override; + + virtual void on_error() = 0; + + void OnWriteDone(bool ok) override; + + // server version + void OnDone(); + // client version + void OnDone(const ::grpc::Status& /*s*/); + + virtual void shutdown(); + + static void shutdown_all(); +}; + } // namespace com::centreon::engine::modules::opentelemetry::centreon_agent #endif diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_reverse_client.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_reverse_client.hh new file mode 100644 index 00000000000..05e21772408 --- /dev/null +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_reverse_client.hh @@ -0,0 +1,63 @@ +/* + * Copyright 2024 Centreon + * + * This file is part of Centreon Engine. + * + * Centreon Engine is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Centreon Engine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Centreon Engine. If not, see + * . + */ + +#ifndef CCE_MOD_OTL_CENTREON_AGENT_AGENT_REVERSE_CLIENT_HH +#define CCE_MOD_OTL_CENTREON_AGENT_AGENT_REVERSE_CLIENT_HH + +#include "com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh" +#include "com/centreon/engine/modules/opentelemetry/otl_data_point.hh" + +namespace com::centreon::engine::modules::opentelemetry::centreon_agent { + +class to_agent_connector; + +class agent_reverse_client { + protected: + std::shared_ptr _io_context; + agent_config::pointer _conf; + const metric_handler _metric_handler; + std::shared_ptr _logger; + + using config_to_client = absl::btree_map, + grpc_config_compare>; + absl::Mutex _agents_m; + config_to_client _agents ABSL_GUARDED_BY(_agents_m); + + virtual config_to_client::iterator _create_new_client_connection( + const grpc_config::pointer& agent_endpoint, + const agent_config::pointer& agent_conf) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(_agents_m); + + virtual void _shutdown_connection(config_to_client::const_iterator to_delete); + + public: + agent_reverse_client( + const std::shared_ptr& io_context, + const metric_handler& handler, + const std::shared_ptr& logger); + + virtual ~agent_reverse_client(); + + void update(const agent_config::pointer& new_conf); +}; + +} // namespace com::centreon::engine::modules::opentelemetry::centreon_agent + +#endif diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_service.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_service.hh new file mode 100644 index 00000000000..4f2d450c674 --- /dev/null +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_service.hh @@ -0,0 +1,71 @@ +/* + * Copyright 2024 Centreon + * + * This file is part of Centreon Engine. + * + * Centreon Engine is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Centreon Engine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Centreon Engine. If not, see + * . + */ + +#ifndef CCE_MOD_OTL_CENTREON_AGENT_AGENT_SERVICE_HH +#define CCE_MOD_OTL_CENTREON_AGENT_AGENT_SERVICE_HH + +#include "com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh" +#include "com/centreon/engine/modules/opentelemetry/centreon_agent/agent_impl.hh" + +namespace com::centreon::engine::modules::opentelemetry::centreon_agent { + +class agent_service : public agent::AgentService::Service, + public std::enable_shared_from_this { + std::shared_ptr _io_context; + agent_config::pointer _conf; + absl::Mutex _conf_m; + + metric_handler _metric_handler; + std::shared_ptr _logger; + + public: + agent_service(const std::shared_ptr& io_context, + const agent_config::pointer& conf, + const metric_handler& handler, + const std::shared_ptr& logger); + + void init(); + + static std::shared_ptr load( + const std::shared_ptr& io_context, + const agent_config::pointer& conf, + const metric_handler& handler, + const std::shared_ptr& logger); + + // disable synchronous version of this method + ::grpc::Status Export( + ::grpc::ServerContext* /*context*/, + ::grpc::ServerReaderWriter* /*stream*/) + override { + abort(); + return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); + } + + ::grpc::ServerBidiReactor* + Export(::grpc::CallbackServerContext* context); + + void update(const agent_config::pointer& conf); + + static void shutdown_all_accepted(); +}; + +} // namespace com::centreon::engine::modules::opentelemetry::centreon_agent + +#endif diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/to_agent_connector.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/to_agent_connector.hh new file mode 100644 index 00000000000..a593db30ba6 --- /dev/null +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/to_agent_connector.hh @@ -0,0 +1,79 @@ +/* + * Copyright 2024 Centreon + * + * This file is part of Centreon Engine. + * + * Centreon Engine is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Centreon Engine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Centreon Engine. If not, see + * . + */ + +#ifndef CCE_MOD_OTL_CENTREON_AGENT_AGENT_CLIENT_HH +#define CCE_MOD_OTL_CENTREON_AGENT_AGENT_CLIENT_HH + +#include "centreon_agent/agent.grpc.pb.h" +#include "com/centreon/engine/modules/opentelemetry/centreon_agent/agent_config.hh" + +#include "com/centreon/common/grpc/grpc_client.hh" +#include "com/centreon/engine/modules/opentelemetry/otl_data_point.hh" + +namespace com::centreon::engine::modules::opentelemetry::centreon_agent { + +class agent_connection; + +/** + * @brief this class is used in case of reverse connection + * it maintains one connection to agent server and reconnect in case of failure + * + */ +class to_agent_connector + : public common::grpc::grpc_client_base, + public std::enable_shared_from_this { + std::shared_ptr _io_context; + metric_handler _metric_handler; + agent_config::pointer _conf; + + bool _alive; + std::unique_ptr _stub; + + absl::Mutex _connection_m; + std::shared_ptr _connection ABSL_GUARDED_BY(_connection_m); + + public: + to_agent_connector(const grpc_config::pointer& agent_endpoint_conf, + const std::shared_ptr& io_context, + const agent_config::pointer& agent_conf, + const metric_handler& handler, + const std::shared_ptr& logger); + + virtual ~to_agent_connector(); + + virtual void start(); + + static std::shared_ptr load( + const grpc_config::pointer& agent_endpoint_conf, + const std::shared_ptr& io_context, + const agent_config::pointer& agent_conf, + const metric_handler& handler, + const std::shared_ptr& logger); + + void refresh_agent_configuration_if_needed( + const agent_config::pointer& new_conf); + + virtual void shutdown(); + + void on_error(); +}; + +} // namespace com::centreon::engine::modules::opentelemetry::centreon_agent + +#endif diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/conf_helper.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/conf_helper.hh new file mode 100644 index 00000000000..c3a0456eeae --- /dev/null +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/conf_helper.hh @@ -0,0 +1,100 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ +#ifndef CCE_MOD_CONF_HELPER_OPENTELEMETRY_HH +#define CCE_MOD_CONF_HELPER_OPENTELEMETRY_HH + +#include "com/centreon/engine/host.hh" +#include "com/centreon/engine/macros.hh" +#include "com/centreon/engine/service.hh" + +#include "com/centreon/engine/commands/forward.hh" + +namespace com::centreon::engine::modules::opentelemetry { + +/** + * @brief extract opentelemetry commands from an host list + * This function must be called from engine main thread, not grpc ones + * + * @tparam command_handler callback called on every opentelemetry command found + * @param host_name name of the host supervised by the agent or telegraf + * @param handler + * @return true at least one opentelemetry command was found + * @return false + */ +template +bool get_otel_commands(const std::string& host_name, + command_handler&& handler, + const std::shared_ptr& logger) { + auto use_otl_command = [](const checkable& to_test) -> bool { + if (to_test.get_check_command_ptr()->get_type() == + commands::command::e_type::otel) + return true; + if (to_test.get_check_command_ptr()->get_type() == + commands::command::e_type::forward) { + return std::static_pointer_cast( + to_test.get_check_command_ptr()) + ->get_sub_command() + ->get_type() == commands::command::e_type::otel; + } + return false; + }; + + bool ret = false; + + auto hst_iter = host::hosts.find(host_name); + if (hst_iter == host::hosts.end()) { + SPDLOG_LOGGER_ERROR(logger, "unknown host:{}", host_name); + return false; + } + std::shared_ptr hst = hst_iter->second; + std::string cmd_line; + // host check use otl? + if (use_otl_command(*hst)) { + nagios_macros* macros(get_global_macros()); + + ret |= handler(hst->check_command(), hst->get_check_command_line(macros), + "", logger); + clear_volatile_macros_r(macros); + } else { + SPDLOG_LOGGER_DEBUG( + logger, "host {} doesn't use opentelemetry to do his check", host_name); + } + // services of host + auto serv_iter = service::services_by_id.lower_bound({hst->host_id(), 0}); + for (; serv_iter != service::services_by_id.end() && + serv_iter->first.first == hst->host_id(); + ++serv_iter) { + std::shared_ptr serv = serv_iter->second; + if (use_otl_command(*serv)) { + nagios_macros* macros(get_global_macros()); + ret |= + handler(serv->check_command(), serv->get_check_command_line(macros), + serv->name(), logger); + clear_volatile_macros_r(macros); + } else { + SPDLOG_LOGGER_DEBUG( + logger, + "host {} service {} doesn't use opentelemetry to do his check", + host_name, serv->name()); + } + } + return ret; +} + +} // namespace com::centreon::engine::modules::opentelemetry +#endif diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/grpc_config.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/grpc_config.hh index a31149670f7..8775f42c420 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/grpc_config.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/grpc_config.hh @@ -43,6 +43,14 @@ class grpc_config : public common::grpc::grpc_config { return !(*this == right); } }; + +struct grpc_config_compare { + bool operator()(const grpc_config::pointer& left, + const grpc_config::pointer& right) const { + return left->compare(*right) < 0; + } +}; + } // namespace com::centreon::engine::modules::opentelemetry #endif // !CCE_MOD_OTL_SERVER_GRPC_CONFIG_HH diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/open_telemetry.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/open_telemetry.hh index b558b07c4e4..aa601e0c951 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/open_telemetry.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/open_telemetry.hh @@ -22,6 +22,7 @@ #include "com/centreon/engine/commands/otel_interface.hh" +#include "centreon_agent/agent_reverse_client.hh" #include "data_point_fifo_container.hh" #include "host_serv_extractor.hh" #include "otl_check_result_builder.hh" @@ -48,6 +49,7 @@ class open_telemetry : public commands::otel::open_telemetry_base { asio::system_timer _second_timer; std::shared_ptr _otl_server; std::shared_ptr _telegraf_conf_server; + std::unique_ptr _agent_reverse_client; using cmd_line_to_extractor_map = absl::btree_map>; @@ -98,7 +100,9 @@ class open_telemetry : public commands::otel::open_telemetry_base { const telegraf::conf_server_config::pointer& conf); protected: - virtual void _create_otl_server(const grpc_config::pointer& server_conf); + virtual void _create_otl_server( + const grpc_config::pointer& server_conf, + const centreon_agent::agent_config::pointer& agent_conf); void _on_metric(const metric_request_ptr& metric); void _reload(); void _start_second_timer(); diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh index 2c1d3526819..71b44670c3a 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh @@ -35,7 +35,10 @@ class data_point_fifo_container; class check_result_builder_config : public commands::otel::check_result_builder_config { public: - enum class converter_type { nagios_check_result_builder }; + enum class converter_type { + nagios_check_result_builder, + centreon_agent_check_result_builder + }; private: const converter_type _type; diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_config.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_config.hh index 16276151653..677706b663c 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_config.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_config.hh @@ -19,14 +19,19 @@ #ifndef CCE_MOD_OTL_SERVER_OTLCONFIG_HH #define CCE_MOD_OTL_SERVER_OTLCONFIG_HH +#include "centreon_agent/agent_config.hh" #include "grpc_config.hh" #include "telegraf/conf_server.hh" namespace com::centreon::engine::modules::opentelemetry { + class otl_config { + private: grpc_config::pointer _grpc_conf; telegraf::conf_server_config::pointer _telegraf_conf_server_config; + centreon_agent::agent_config::pointer _centreon_agent_config; + int _max_length_grpc_log = -1; // all otel are logged if negative bool _json_grpc_log = false; // if true, otel object are logged in json // format instead of protobuf debug format @@ -46,6 +51,10 @@ class otl_config { return _telegraf_conf_server_config; } + centreon_agent::agent_config::pointer get_centreon_agent_config() const { + return _centreon_agent_config; + } + int get_max_length_grpc_log() const { return _max_length_grpc_log; } bool get_json_grpc_log() const { return _json_grpc_log; } diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_data_point.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_data_point.hh index 1e0ca128278..bad1bc2236e 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_data_point.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_data_point.hh @@ -88,6 +88,13 @@ using metric_request_ptr = std::shared_ptr<::opentelemetry::proto::collector::metrics::v1:: ExportMetricsServiceRequest>; +/** + * @brief the server grpc model used is the callback model + * So you need to give to the server this handler to handle incoming requests + * + */ +using metric_handler = std::function; + /** * @brief some metrics will be computed and other not * This bean represents a DataPoint, it embeds all ExportMetricsServiceRequest @@ -113,6 +120,8 @@ class otl_data_point { const google::protobuf::Message& _data_point; const ::google::protobuf::RepeatedPtrField< ::opentelemetry::proto::common::v1::KeyValue>& _data_point_attributes; + const ::google::protobuf::RepeatedPtrField< + ::opentelemetry::proto::metrics::v1::Exemplar>& _exemplars; uint64_t _nano_timestamp; data_point_type _type; double _value; @@ -176,6 +185,12 @@ class otl_data_point { double get_value() const { return _value; } + const ::google::protobuf::RepeatedPtrField< + ::opentelemetry::proto::metrics::v1::Exemplar>& + get_exemplars() const { + return _exemplars; + } + template static void extract_data_points(const metric_request_ptr& metrics, data_point_handler&& handler); diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_fmt.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_fmt.hh index c50048a6d0b..40c2facfd18 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_fmt.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_fmt.hh @@ -63,6 +63,72 @@ struct formatter< } }; +template <> +struct formatter + : formatter { + /** + * @brief if this static parameter is < 0, we dump all request, otherwise, we + * limit dump length to this value + * + */ + template + auto format(const com::centreon::agent::MessageFromAgent& p, + FormatContext& ctx) const -> decltype(ctx.out()) { + using otl_formatter = + formatter< ::opentelemetry::proto::collector::metrics::v1:: + ExportMetricsServiceRequest>; + + if (otl_formatter::json_grpc_format) { + std::string output; + google::protobuf::util::MessageToJsonString(p, &output); + return formatter::format( + otl_formatter::max_length_log > 0 + ? output.substr(0, otl_formatter::max_length_log) + : output, + ctx); + } else { + return formatter::format( + otl_formatter::max_length_log > 0 + ? p.ShortDebugString().substr(0, otl_formatter::max_length_log) + : p.ShortDebugString(), + ctx); + } + } +}; + +template <> +struct formatter + : formatter { + /** + * @brief if this static parameter is < 0, we dump all request, otherwise, we + * limit dump length to this value + * + */ + template + auto format(const com::centreon::agent::MessageToAgent& p, + FormatContext& ctx) const -> decltype(ctx.out()) { + using otl_formatter = + formatter< ::opentelemetry::proto::collector::metrics::v1:: + ExportMetricsServiceRequest>; + + if (otl_formatter::json_grpc_format) { + std::string output; + google::protobuf::util::MessageToJsonString(p, &output); + return formatter::format( + otl_formatter::max_length_log > 0 + ? output.substr(0, otl_formatter::max_length_log) + : output, + ctx); + } else { + return formatter::format( + otl_formatter::max_length_log > 0 + ? p.ShortDebugString().substr(0, otl_formatter::max_length_log) + : p.ShortDebugString(), + ctx); + } + } +}; + }; // namespace fmt #endif diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_server.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_server.hh index 0dd766bb982..935aac30d9c 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_server.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_server.hh @@ -24,6 +24,7 @@ #include "otl_data_point.hh" #include "com/centreon/common/grpc/grpc_server.hh" +#include "com/centreon/engine/modules/opentelemetry/centreon_agent/agent_service.hh" namespace com::centreon::engine::modules::opentelemetry { @@ -31,13 +32,6 @@ namespace detail { class metric_service; }; -/** - * @brief the server grpc model used is the callback model - * So you need to give to the server this handler to handle incoming requests - * - */ -using metric_handler = std::function; - /** * @brief grpc metric receiver server * must be constructed with load method @@ -45,8 +39,12 @@ using metric_handler = std::function; */ class otl_server : public common::grpc::grpc_server_base { std::shared_ptr _service; + std::shared_ptr _agent_service; + absl::Mutex _protect; - otl_server(const grpc_config::pointer& conf, + otl_server(const std::shared_ptr& io_context, + const grpc_config::pointer& conf, + const centreon_agent::agent_config::pointer& agent_config, const metric_handler& handler, const std::shared_ptr& logger); void start(); @@ -56,9 +54,15 @@ class otl_server : public common::grpc::grpc_server_base { ~otl_server(); - static pointer load(const grpc_config::pointer& conf, - const metric_handler& handler, - const std::shared_ptr& logger); + static pointer load( + const std::shared_ptr& io_context, + const grpc_config::pointer& conf, + const centreon_agent::agent_config::pointer& agent_config, + const metric_handler& handler, + const std::shared_ptr& logger); + + void update_agent_config( + const centreon_agent::agent_config::pointer& agent_config); }; } // namespace com::centreon::engine::modules::opentelemetry diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/conf_server.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/conf_server.hh index 1e6a94b9f6b..989af594b33 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/conf_server.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/conf_server.hh @@ -75,7 +75,7 @@ class conf_session : public connection_class { void on_receive_request(const std::shared_ptr& request); void answer_to_request(const std::shared_ptr& request, - std::vector&& host_list); + const std::string& host); bool _get_commands(const std::string& host_name, std::string& request_body); diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/nagios_check_result_builder.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/nagios_check_result_builder.hh index 77bcd34b533..8ffc0392141 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/nagios_check_result_builder.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/nagios_check_result_builder.hh @@ -20,6 +20,7 @@ #define CCE_MOD_OTL_NAGIOS_CONVERTER_HH namespace com::centreon::engine::modules::opentelemetry::telegraf { + /** * @brief telegraf accept to use nagios plugins * This converter parse metrics special naming to rebuild original check_result diff --git a/engine/modules/opentelemetry/precomp_inc/precomp.hh b/engine/modules/opentelemetry/precomp_inc/precomp.hh index 67a56f7e324..de025ed071d 100644 --- a/engine/modules/opentelemetry/precomp_inc/precomp.hh +++ b/engine/modules/opentelemetry/precomp_inc/precomp.hh @@ -25,6 +25,7 @@ #include #include +#include #include #include #include diff --git a/engine/modules/opentelemetry/src/centreon_agent/agent_check_result_builder.cc b/engine/modules/opentelemetry/src/centreon_agent/agent_check_result_builder.cc new file mode 100644 index 00000000000..13bb39343e3 --- /dev/null +++ b/engine/modules/opentelemetry/src/centreon_agent/agent_check_result_builder.cc @@ -0,0 +1,168 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include "data_point_fifo_container.hh" + +#include "otl_check_result_builder.hh" + +#include "centreon_agent/agent_check_result_builder.hh" + +using namespace com::centreon::engine::modules::opentelemetry::centreon_agent; + +namespace com::centreon::engine::modules::opentelemetry::centreon_agent:: + detail { + +struct perf_data { + std::optional warning_le, warning_lt, warning_ge, warning_gt; + std::optional critical_le, critical_lt, critical_ge, critical_gt; + std::optional min, max; + + void apply_exemplar( + const ::opentelemetry::proto::metrics::v1::Exemplar& exemplar); + + void append_to_string(std::string* to_append); + + static const absl::flat_hash_map perf_data::*> + _suffix_to_value; +}; + +const absl::flat_hash_map perf_data::*> + perf_data::_suffix_to_value = {{"warn_le", &perf_data::warning_le}, + {"warn_lt", &perf_data::warning_lt}, + {"warn_ge", &perf_data::warning_ge}, + {"warn_gt", &perf_data::warning_gt}, + {"crit_le", &perf_data::critical_le}, + {"crit_lt", &perf_data::critical_lt}, + {"crit_ge", &perf_data::critical_ge}, + {"crit_gt", &perf_data::critical_gt}, + {"min", &perf_data::min}, + {"max", &perf_data::max}}; + +void perf_data::apply_exemplar( + const ::opentelemetry::proto::metrics::v1::Exemplar& exemplar) { + if (!exemplar.filtered_attributes().empty()) { + auto search = + _suffix_to_value.find(exemplar.filtered_attributes().begin()->key()); + if (search != _suffix_to_value.end()) { + this->*search->second = exemplar.as_double(); + } + } +} + +void perf_data::append_to_string(std::string* to_append) { + if (warning_le) { + absl::StrAppend(to_append, "@", *warning_le, ":"); + if (warning_ge) + absl::StrAppend(to_append, *warning_ge); + } else if (warning_ge) { + absl::StrAppend(to_append, "@~:", *warning_ge); + } else if (warning_lt) { + absl::StrAppend(to_append, *warning_lt, ":"); + if (warning_gt) + absl::StrAppend(to_append, *warning_gt); + } else if (warning_gt) { + absl::StrAppend(to_append, "~:", *warning_gt); + } + to_append->push_back(';'); + if (critical_le) { + absl::StrAppend(to_append, "@", *critical_le, ":"); + if (critical_ge) + absl::StrAppend(to_append, *critical_ge); + } else if (critical_ge) { + absl::StrAppend(to_append, "@~:", *critical_ge); + } else if (critical_lt) { + absl::StrAppend(to_append, *critical_lt, ":"); + if (critical_gt) + absl::StrAppend(to_append, *critical_gt); + } else if (critical_gt) { + absl::StrAppend(to_append, "~:", *critical_gt); + } + to_append->push_back(';'); + if (min) + absl::StrAppend(to_append, *min); + to_append->push_back(';'); + if (max) + absl::StrAppend(to_append, *max); +} + +} // namespace + // com::centreon::engine::modules::opentelemetry::centreon_agent::detail + +/** + * @brief + * + * @param fifos all metrics for a given service + * @param res + * @return true + * @return false + */ +bool agent_check_result_builder::_build_result_from_metrics( + metric_name_to_fifo& fifos, + commands::result& res) { + // first we search last state timestamp + uint64_t last_time = 0; + + for (auto& metric_to_fifo : fifos) { + if (metric_to_fifo.first == "status") { + auto& fifo = metric_to_fifo.second.get_fifo(); + if (!fifo.empty()) { + const auto& last_sample = *fifo.rbegin(); + last_time = last_sample.get_nano_timestamp(); + res.exit_code = last_sample.get_value(); + // output of plugins is stored in description metric field + res.output = last_sample.get_metric().description(); + metric_to_fifo.second.clean_oldest(last_time); + } + break; + } + } + if (!last_time) { + return false; + } + res.command_id = get_command_id(); + res.exit_status = process::normal; + res.end_time = res.start_time = + timestamp(last_time / 1000000000, (last_time / 1000) % 1000000); + + res.output.push_back('|'); + + for (auto& metric_to_fifo : fifos) { + auto& fifo = metric_to_fifo.second.get_fifo(); + auto data_pt_search = fifo.find(last_time); + if (data_pt_search != fifo.end()) { + res.output.push_back(' '); + const otl_data_point& data_pt = *data_pt_search; + absl::StrAppend(&res.output, metric_to_fifo.first, "=", + data_pt.get_value(), data_pt.get_metric().unit(), ";"); + + // all other metric value (warning_lt, critical_gt, min... are stored in + // exemplars) + detail::perf_data to_append; + for (const auto& exemplar : data_pt.get_exemplars()) { + to_append.apply_exemplar(exemplar); + } + to_append.append_to_string(&res.output); + } + metric_to_fifo.second.clean_oldest(last_time); + } + + data_point_fifo_container::clean_empty_fifos(fifos); + + return true; +} diff --git a/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc b/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc new file mode 100644 index 00000000000..dd03c8aaadf --- /dev/null +++ b/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc @@ -0,0 +1,125 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include "com/centreon/common/rapidjson_helper.hh" + +#include "centreon_agent/agent_config.hh" + +#include "com/centreon/exceptions/msg_fmt.hh" + +using namespace com::centreon::engine::modules::opentelemetry::centreon_agent; +using namespace com::centreon::common; + +static constexpr std::string_view _config_schema(R"( +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "centreon agent config", + "properties": { + "check_interval": { + "description": "interval in seconds between two checks", + "type": "integer", + "minimum": 10 + }, + "max_concurrent_checks": { + "description": "maximum of running checks at the same time", + "type": "integer", + "minimum": 1 + }, + "export_period": { + "description": "period in second of agent metric export", + "type": "integer", + "minimum": 10 + }, + "check_timeout": { + "description": "check running timeout", + "type": "integer", + "minimum": 1 + }, + "reverse_connections": { + "description": "array of agent endpoints (reverse mode, engine connects to centreon-agent) ", + "type": "array", + "items": { + "type" : "object" + } + } + }, + "type": "object" +} +)"); + +agent_config::agent_config(const rapidjson::Value& json_config_v) { + static json_validator validator(_config_schema); + + rapidjson_helper file_content(json_config_v); + + file_content.validate(validator); + + _check_interval = file_content.get_unsigned("check_interval", 60); + _max_concurrent_checks = + file_content.get_unsigned("max_concurrent_checks", 100); + _export_period = file_content.get_unsigned("export_period", 60); + _check_timeout = file_content.get_unsigned("_check_timeout", 30); + + if (file_content.has_member("agent_reverse_server")) { + const auto& reverse_array = file_content.get_member("agent_reverse_server"); + for (auto conf_iter = reverse_array.Begin(); + conf_iter != reverse_array.End(); ++conf_iter) { + _agent_grpc_reverse_conf.insert( + std::make_shared(*conf_iter)); + } + } +} + +agent_config::agent_config(uint32_t check_interval, + uint32_t max_concurrent_checks, + uint32_t export_period, + uint32_t check_timeout) + : _check_interval(check_interval), + _max_concurrent_checks(max_concurrent_checks), + _export_period(export_period), + _check_timeout(check_timeout) {} + +agent_config::agent_config( + uint32_t check_interval, + uint32_t max_concurrent_checks, + uint32_t export_period, + uint32_t check_timeout, + const std::initializer_list& endpoints) + : _agent_grpc_reverse_conf(endpoints), + _check_interval(check_interval), + _max_concurrent_checks(max_concurrent_checks), + _export_period(export_period), + _check_timeout(check_timeout) {} + +bool agent_config::operator==(const agent_config& right) const { + if (_check_interval != right._check_interval || + _max_concurrent_checks != right._max_concurrent_checks || + _export_period != right._export_period || + _check_timeout != right._check_timeout || + _agent_grpc_reverse_conf.size() != right._agent_grpc_reverse_conf.size()) + return false; + + for (auto rev_conf_left = _agent_grpc_reverse_conf.begin(), + rev_conf_right = right._agent_grpc_reverse_conf.begin(); + rev_conf_left != _agent_grpc_reverse_conf.end(); + ++rev_conf_left, ++rev_conf_right) { + if (**rev_conf_left != **rev_conf_right) + return false; + } + return true; +} diff --git a/engine/modules/opentelemetry/src/centreon_agent/agent_impl.cc b/engine/modules/opentelemetry/src/centreon_agent/agent_impl.cc index a6038111221..ef4cff251b0 100644 --- a/engine/modules/opentelemetry/src/centreon_agent/agent_impl.cc +++ b/engine/modules/opentelemetry/src/centreon_agent/agent_impl.cc @@ -17,6 +17,363 @@ * . */ +#include + #include "centreon_agent/agent_impl.hh" +#include "conf_helper.hh" +#include "otl_fmt.hh" + +#include "com/centreon/engine/command_manager.hh" + using namespace com::centreon::engine::modules::opentelemetry::centreon_agent; + +/** + * @brief when BiReactor::OnDone is called by grpc layers, we should delete + * this. But this object is even used by others. + * So it's stored in this container and just removed from this container when + * OnDone is called + * This container is also used to push configuration changes to agent + * + * @tparam bireactor_class + */ +template +std::set>> + agent_impl::_instances; + +template +absl::Mutex agent_impl::_instances_m; + +/** + * @brief Construct a new agent impl::agent impl object + * + * @tparam bireactor_class + * @param io_context + * @param class_name + * @param handler handler that will process received metrics + * @param logger + */ +template +agent_impl::agent_impl( + const std::shared_ptr& io_context, + const std::string_view class_name, + const agent_config::pointer& conf, + const metric_handler& handler, + const std::shared_ptr& logger) + : _io_context(io_context), + _class_name(class_name), + _conf(conf), + _metric_handler(handler), + _logger(logger), + _write_pending(false), + _alive(true) { + SPDLOG_LOGGER_DEBUG(logger, "create {} this={:p}", _class_name, + static_cast(this)); +} + +/** + * @brief Destroy the agent impl::agent impl object + * + * @tparam bireactor_class + */ +template +agent_impl::~agent_impl() { + SPDLOG_LOGGER_DEBUG(_logger, "delete {} this={:p}", _class_name, + static_cast(this)); +} + +/** + * @brief just call _calc_and_send_config_if_needed in main engine thread + * + * @tparam bireactor_class + */ +template +void agent_impl::calc_and_send_config_if_needed( + const agent_config::pointer& new_conf) { + { + absl::MutexLock l(&_protect); + _conf = new_conf; + } + auto to_call = std::packaged_task( + [me = std::enable_shared_from_this>:: + shared_from_this()]() mutable -> int32_t { + // then we are in the main thread + // services, hosts and commands are stable + me->_calc_and_send_config_if_needed(); + return 0; + }); + command_manager::instance().enqueue(std::move(to_call)); +} + +/** + * @brief static method used to push new configuration to all agents + * + * @tparam bireactor_class + */ +template +void agent_impl::all_agent_calc_and_send_config_if_needed( + const agent_config::pointer& new_conf) { + absl::MutexLock l(&_instances_m); + for (auto& instance : _instances) { + instance->calc_and_send_config_if_needed(new_conf); + } +} + +static bool add_command_to_agent_conf( + const std::string& cmd_name, + const std::string& cmd_line, + const std::string& service, + com::centreon::agent::AgentConfiguration* cnf, + const std::shared_ptr& logger, + const std::string& peer) { + std::string plugins_cmdline = boost::trim_copy(cmd_line); + + if (plugins_cmdline.empty()) { + SPDLOG_LOGGER_ERROR(logger, + "agent: {} serv: {}, no plugins cmd_line found in {}", + peer, service, cmd_line); + return false; + } + + SPDLOG_LOGGER_TRACE(logger, "agent: {}, serv: {}, cmd {} plugins cmd_line {}", + peer, service, cmd_name, cmd_line); + + com::centreon::agent::Service* serv = cnf->add_services(); + serv->set_service_description(service); + serv->set_command_name(cmd_name); + serv->set_command_line(plugins_cmdline); + + return true; +} + +/** + * @brief this function must be called in the engine main thread + * + * @tparam bireactor_class + */ +template +void agent_impl::_calc_and_send_config_if_needed() { + std::shared_ptr new_conf = + std::make_shared(); + { + agent::AgentConfiguration* cnf = new_conf->mutable_config(); + cnf->set_check_interval(_conf->get_check_interval()); + cnf->set_check_timeout(_conf->get_check_timeout()); + cnf->set_export_period(_conf->get_export_period()); + cnf->set_max_concurrent_checks(_conf->get_max_concurrent_checks()); + cnf->set_use_exemplar(true); + absl::MutexLock l(&_protect); + if (!_alive) { + return; + } + if (_agent_info) { + const std::string& peer = get_peer(); + bool at_least_one_command_found = get_otel_commands( + _agent_info->init().host(), + [cnf, &peer](const std::string& cmd_name, const std::string& cmd_line, + const std::string& service, + const std::shared_ptr& logger) { + return add_command_to_agent_conf(cmd_name, cmd_line, service, cnf, + logger, peer); + }, + _logger); + if (!at_least_one_command_found) { + SPDLOG_LOGGER_ERROR(_logger, "no command found for agent {}", + get_peer()); + } + } + if (!_last_sent_config || + !::google::protobuf::util::MessageDifferencer::Equals( + *cnf, _last_sent_config->config())) { + _last_sent_config = new_conf; + } else { + new_conf.reset(); + } + } + if (new_conf) { + SPDLOG_LOGGER_DEBUG(_logger, "send conf to {}", get_peer()); + _write(new_conf); + } +} + +template +void agent_impl::on_request( + const std::shared_ptr& request) { + agent_config::pointer agent_conf; + if (request->has_init()) { + { + absl::MutexLock l(&_protect); + _agent_info = request; + agent_conf = _conf; + _last_sent_config.reset(); + } + SPDLOG_LOGGER_DEBUG(_logger, "init from {}", get_peer()); + calc_and_send_config_if_needed(agent_conf); + } + if (request->has_otel_request()) { + metric_request_ptr received(request->unsafe_arena_release_otel_request()); + _metric_handler(received); + } +} + +template +void agent_impl::_write( + const std::shared_ptr& request) { + { + absl::MutexLock l(&_protect); + if (!_alive) { + return; + } + _write_queue.push_back(request); + } + start_write(); +} + +template +void agent_impl::register_stream( + const std::shared_ptr& strm) { + absl::MutexLock l(&_instances_m); + _instances.insert(strm); +} + +template +void agent_impl::start_read() { + absl::MutexLock l(&_protect); + if (!_alive) { + return; + } + std::shared_ptr to_read; + if (_read_current) { + return; + } + to_read = _read_current = std::make_shared(); + bireactor_class::StartRead(to_read.get()); +} + +template +void agent_impl::OnReadDone(bool ok) { + if (ok) { + std::shared_ptr readden; + { + absl::MutexLock l(&_protect); + SPDLOG_LOGGER_TRACE(_logger, "{:p} {} receive from {}: {}", + static_cast(this), _class_name, + get_peer(), *_read_current); + readden = _read_current; + _read_current.reset(); + } + start_read(); + on_request(readden); + } else { + SPDLOG_LOGGER_ERROR(_logger, "{:p} {} fail read from {}", + static_cast(this), _class_name, get_peer()); + on_error(); + this->shutdown(); + } +} + +template +void agent_impl::start_write() { + std::shared_ptr to_send; + { + absl::MutexLock l(&_protect); + if (!_alive || _write_pending || _write_queue.empty()) { + return; + } + to_send = _write_queue.front(); + _write_pending = true; + } + SPDLOG_LOGGER_TRACE(_logger, "{:p} {} send to {}: {}", + static_cast(this), _class_name, get_peer(), + *to_send); + bireactor_class::StartWrite(to_send.get()); +} + +template +void agent_impl::OnWriteDone(bool ok) { + if (ok) { + { + absl::MutexLock l(&_protect); + _write_pending = false; + SPDLOG_LOGGER_TRACE(_logger, "{:p} {} {} sent", + static_cast(this), _class_name, + **_write_queue.begin()); + _write_queue.pop_front(); + } + start_write(); + } else { + SPDLOG_LOGGER_ERROR(_logger, "{:p} {} fail write to stream", + static_cast(this), _class_name); + on_error(); + this->shutdown(); + } +} + +template +void agent_impl::OnDone() { + /**grpc has a bug, sometimes if we delete this class in this handler as it is + * described in examples, it also deletes used channel and does a pthread_join + * of the current thread witch go to a EDEADLOCK error and call grpc::Crash. + * So we uses asio thread to do the job + */ + _io_context->post([me = std::enable_shared_from_this< + agent_impl>::shared_from_this(), + logger = _logger]() { + absl::MutexLock l(&_instances_m); + SPDLOG_LOGGER_DEBUG(logger, "{:p} server::OnDone()", + static_cast(me.get())); + _instances.erase(std::static_pointer_cast>(me)); + }); +} + +template +void agent_impl::OnDone(const ::grpc::Status& status) { + /**grpc has a bug, sometimes if we delete this class in this handler as it is + * described in examples, it also deletes used channel and does a + * pthread_join of the current thread witch go to a EDEADLOCK error and call + * grpc::Crash. So we uses asio thread to do the job + */ + _io_context->post([me = std::enable_shared_from_this< + agent_impl>::shared_from_this(), + status, logger = _logger]() { + absl::MutexLock l(&_instances_m); + if (status.ok()) { + SPDLOG_LOGGER_DEBUG(logger, "{:p} client::OnDone({}) {}", + static_cast(me.get()), status.error_message(), + status.error_details()); + } else { + SPDLOG_LOGGER_ERROR(logger, "{:p} client::OnDone({}) {}", + static_cast(me.get()), status.error_message(), + status.error_details()); + } + _instances.erase(std::static_pointer_cast>(me)); + }); +} + +template +void agent_impl::shutdown() { + SPDLOG_LOGGER_DEBUG(_logger, "{:p} {}::shutdown", static_cast(this), + _class_name); +} + +template +void agent_impl::shutdown_all() { + std::set> to_shutdown; + { + absl::MutexLock l(&_instances_m); + to_shutdown = std::move(_instances); + } + for (std::shared_ptr conn : to_shutdown) { + conn->shutdown(); + } +} + +namespace com::centreon::engine::modules::opentelemetry::centreon_agent { + +template class agent_impl< + ::grpc::ClientBidiReactor>; + +template class agent_impl< + ::grpc::ServerBidiReactor>; + +} // namespace com::centreon::engine::modules::opentelemetry::centreon_agent \ No newline at end of file diff --git a/engine/modules/opentelemetry/src/centreon_agent/agent_reverse_client.cc b/engine/modules/opentelemetry/src/centreon_agent/agent_reverse_client.cc new file mode 100644 index 00000000000..5f659b083e5 --- /dev/null +++ b/engine/modules/opentelemetry/src/centreon_agent/agent_reverse_client.cc @@ -0,0 +1,130 @@ +/* + * Copyright 2024 Centreon + * + * This file is part of Centreon Engine. + * + * Centreon Engine is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Centreon Engine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Centreon Engine. If not, see + * . + */ + +#include "centreon_agent/agent_reverse_client.hh" +#include "centreon_agent/to_agent_connector.hh" + +using namespace com::centreon::engine::modules::opentelemetry::centreon_agent; + +/** + * @brief Construct a new agent reverse client::agent reverse client object + * + * @param io_context + * @param handler handler that will process received metrics + * @param logger + */ +agent_reverse_client::agent_reverse_client( + const std::shared_ptr& io_context, + const metric_handler& handler, + const std::shared_ptr& logger) + : _io_context(io_context), _metric_handler(handler), _logger(logger) {} + +/** + * @brief Destroy the agent reverse client::agent reverse client object + * it also shutdown all connectors + * + */ +agent_reverse_client::~agent_reverse_client() { + absl::MutexLock l(&_agents_m); + for (auto& conn : _agents) { + conn.second->shutdown(); + } + _agents.clear(); +} + +/** + * @brief update agent list by doing a symmetric difference + * + * @param new_conf + */ +void agent_reverse_client::update(const agent_config::pointer& new_conf) { + absl::MutexLock l(&_agents_m); + + auto connection_iterator = _agents.begin(); + auto conf_iterator = new_conf->get_agent_grpc_reverse_conf().begin(); + + while (connection_iterator != _agents.end() && + conf_iterator != new_conf->get_agent_grpc_reverse_conf().end()) { + int compare_res = connection_iterator->first->compare(**conf_iterator); + if (compare_res > 0) { + connection_iterator = + _create_new_client_connection(*conf_iterator, new_conf); + ++connection_iterator; + ++conf_iterator; + } else if (compare_res < 0) { + _shutdown_connection(connection_iterator); + connection_iterator = _agents.erase(connection_iterator); + } else { + connection_iterator->second->refresh_agent_configuration_if_needed( + new_conf); + ++connection_iterator; + ++conf_iterator; + } + } + + while (connection_iterator != _agents.end()) { + _shutdown_connection(connection_iterator); + connection_iterator = _agents.erase(connection_iterator); + } + + for (; conf_iterator != new_conf->get_agent_grpc_reverse_conf().end(); + ++conf_iterator) { + _create_new_client_connection(*conf_iterator, new_conf); + } +} + +/** + * @brief create and start a new agent connection + * + * @param conf + * @param io_context + * @param handler + * @param logger + * @return agent_reverse_client::config_to_client::iterator iterator to the new + * element inserted + */ + +/** + * @brief create and start a new agent reversed connection + * + * @param agent_endpoint endpoint to connect + * @param new_conf global agent configuration + * @return agent_reverse_client::config_to_client::iterator iterator to the new + * element inserted + */ +agent_reverse_client::config_to_client::iterator +agent_reverse_client::_create_new_client_connection( + const grpc_config::pointer& agent_endpoint, + const agent_config::pointer& agent_conf) { + auto insert_res = _agents.try_emplace( + agent_endpoint, + to_agent_connector::load(agent_endpoint, _io_context, agent_conf, + _metric_handler, _logger)); + return insert_res.first; +} + +/** + * @brief only shutdown client connection, no container erase + * + * @param to_delete + */ +void agent_reverse_client::_shutdown_connection( + config_to_client::const_iterator to_delete) { + to_delete->second->shutdown(); +} diff --git a/engine/modules/opentelemetry/src/centreon_agent/agent_service.cc b/engine/modules/opentelemetry/src/centreon_agent/agent_service.cc new file mode 100644 index 00000000000..856d002c340 --- /dev/null +++ b/engine/modules/opentelemetry/src/centreon_agent/agent_service.cc @@ -0,0 +1,152 @@ +/* + * Copyright 2024 Centreon + * + * This file is part of Centreon Engine. + * + * Centreon Engine is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Centreon Engine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Centreon Engine. If not, see + * . + */ + +#include "centreon_agent/agent_service.hh" + +using namespace com::centreon::engine::modules::opentelemetry::centreon_agent; + +namespace com::centreon::engine::modules::opentelemetry::centreon_agent { + +class server_bireactor + : public agent_impl<::grpc::ServerBidiReactor> { + const std::string _peer; + + public: + template + server_bireactor(const std::shared_ptr& io_context, + const agent_config::pointer& conf, + const otel_request_handler& handler, + const std::shared_ptr& logger, + const std::string& peer) + : agent_impl<::grpc::ServerBidiReactor>( + io_context, + "agent_server", + conf, + handler, + logger), + _peer(peer) { + SPDLOG_LOGGER_DEBUG(_logger, "connected with agent {}", _peer); + } + + const std::string& get_peer() const override { return _peer; } + + void on_error() override; + void shutdown() override; +}; + +void server_bireactor::on_error() { + shutdown(); +} + +void server_bireactor::shutdown() { + absl::MutexLock l(&_protect); + if (_alive) { + _alive = false; + agent_impl<::grpc::ServerBidiReactor>::shutdown(); + Finish(::grpc::Status::CANCELLED); + SPDLOG_LOGGER_DEBUG(_logger, "end of agent connection with {}", _peer); + } +} + +} // namespace com::centreon::engine::modules::opentelemetry::centreon_agent + +/** + * @brief Construct a new agent service::agent service object + * don't use it, use agent_service::load instead + * + * @param io_context + * @param handler + * @param logger + */ +agent_service::agent_service( + const std::shared_ptr& io_context, + const agent_config::pointer& conf, + const metric_handler& handler, + const std::shared_ptr& logger) + : _io_context(io_context), + _conf(conf), + _metric_handler(handler), + _logger(logger) {} + +/** + * @brief prefered way to construct an agent_service + * + * @param io_context + * @param handler + * @param logger + * @return std::shared_ptr + */ +std::shared_ptr agent_service::load( + const std::shared_ptr& io_context, + const agent_config::pointer& conf, + const metric_handler& handler, + const std::shared_ptr& logger) { + std::shared_ptr ret = std::make_shared( + io_context, conf, std::move(handler), logger); + ret->init(); + return ret; +} + +/** + * @brief to call after construction + * + */ +void agent_service::init() { + ::grpc::Service::MarkMethodCallback( + 0, new ::grpc::internal::CallbackBidiHandler< + com::centreon::agent::MessageFromAgent, + com::centreon::agent::MessageToAgent>( + [me = shared_from_this()](::grpc::CallbackServerContext* context) { + return me->Export(context); + })); +} + +/** + * @brief called by grpc layer on each incoming connection + * + * @param context + * @return ::grpc::ServerBidiReactor* + */ +::grpc::ServerBidiReactor* +agent_service::Export(::grpc::CallbackServerContext* context) { + std::shared_ptr new_reactor; + { + absl::MutexLock l(&_conf_m); + new_reactor = std::make_shared( + _io_context, _conf, _metric_handler, _logger, context->peer()); + } + server_bireactor::register_stream(new_reactor); + new_reactor->start_read(); + + return new_reactor.get(); +} + +void agent_service::shutdown_all_accepted() { + server_bireactor::shutdown_all(); +} + +void agent_service::update(const agent_config::pointer& conf) { + absl::MutexLock l(&_conf_m); + _conf = conf; +} diff --git a/engine/modules/opentelemetry/src/centreon_agent/to_agent_connector.cc b/engine/modules/opentelemetry/src/centreon_agent/to_agent_connector.cc new file mode 100644 index 00000000000..862e7790d11 --- /dev/null +++ b/engine/modules/opentelemetry/src/centreon_agent/to_agent_connector.cc @@ -0,0 +1,223 @@ +/* + * Copyright 2024 Centreon + * + * This file is part of Centreon Engine. + * + * Centreon Engine is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Centreon Engine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Centreon Engine. If not, see + * . + */ + +#include "com/centreon/common/defer.hh" + +#include "centreon_agent/to_agent_connector.hh" + +#include "centreon_agent/agent_impl.hh" + +using namespace com::centreon::engine::modules::opentelemetry::centreon_agent; + +namespace com::centreon::engine::modules::opentelemetry::centreon_agent { + +/** + * @brief reverse connection to an agent + * + */ +class agent_connection + : public agent_impl<::grpc::ClientBidiReactor> { + std::weak_ptr _parent; + + std::string _peer; + ::grpc::ClientContext _context; + + public: + agent_connection(const std::shared_ptr& io_context, + const std::shared_ptr& parent, + const agent_config::pointer& conf, + const metric_handler& handler, + const std::shared_ptr& logger); + + ::grpc::ClientContext& get_context() { return _context; } + + void on_error() override; + + void shutdown() override; + + const std::string& get_peer() const override { return _peer; } +}; + +/** + * @brief Construct a new agent connection::agent connection object + * + * @param io_context + * @param parent + * @param handler + * @param logger + */ +agent_connection::agent_connection( + const std::shared_ptr& io_context, + const std::shared_ptr& parent, + const agent_config::pointer& conf, + const metric_handler& handler, + const std::shared_ptr& logger) + : agent_impl<::grpc::ClientBidiReactor>( + io_context, + "reverse_client", + conf, + handler, + logger) { + _peer = parent->get_conf()->get_hostport(); +} + +/** + * @brief called by OnReadDone or OnWriteDone when ok = false + * + */ +void agent_connection::on_error() { + std::shared_ptr parent = _parent.lock(); + if (parent) { + parent->on_error(); + } +} + +/** + * @brief shutdown connection before delete + * + */ +void agent_connection::shutdown() { + absl::MutexLock l(&_protect); + if (_alive) { + _alive = false; + agent_impl<::grpc::ClientBidiReactor>::shutdown(); + RemoveHold(); + _context.TryCancel(); + } +} + +}; // namespace com::centreon::engine::modules::opentelemetry::centreon_agent +/** + * @brief Construct a new agent client::agent client object + * use to_agent_connector instead + * @param conf + * @param io_context + * @param handler handler that will process receivedmetrics + * @param logger + */ +to_agent_connector::to_agent_connector( + const grpc_config::pointer& agent_endpoint_conf, + const std::shared_ptr& io_context, + const agent_config::pointer& agent_conf, + const metric_handler& handler, + const std::shared_ptr& logger) + : common::grpc::grpc_client_base(agent_endpoint_conf, logger), + _io_context(io_context), + _conf(agent_conf), + _metric_handler(handler), + _alive(true) { + _stub = std::move(agent::ReversedAgentService::NewStub(_channel)); +} + +/** + * @brief Destroy the to agent connector::to agent connector object + * shutdown connection + */ +to_agent_connector::~to_agent_connector() { + shutdown(); +} + +/** + * @brief construct an start a new client + * + * @param conf + * @param io_context + * @param handler + * @param logger + * @return std::shared_ptr client created and started + */ +std::shared_ptr to_agent_connector::load( + const grpc_config::pointer& agent_endpoint_conf, + const std::shared_ptr& io_context, + const agent_config::pointer& agent_conf, + const metric_handler& handler, + const std::shared_ptr& logger) { + std::shared_ptr ret = + std::make_shared(agent_endpoint_conf, io_context, + agent_conf, handler, logger); + ret->start(); + return ret; +} + +/** + * @brief connect to agent and initialize exchange + * + */ +void to_agent_connector::start() { + absl::MutexLock l(&_connection_m); + if (!_alive) { + return; + } + SPDLOG_LOGGER_INFO(get_logger(), "connect to {}", get_conf()->get_hostport()); + if (_connection) { + _connection->shutdown(); + _connection.reset(); + } + _connection = std::make_shared( + _io_context, shared_from_this(), _conf, _metric_handler, get_logger()); + agent_connection::register_stream(_connection); + _stub->async()->Import(&_connection->get_context(), _connection.get()); + _connection->start_read(); + _connection->AddHold(); + _connection->StartCall(); +} + +/** + * @brief send conf to agent if something has changed (list of services, + * commands...) + * + */ +void to_agent_connector::refresh_agent_configuration_if_needed( + const agent_config::pointer& new_conf) { + absl::MutexLock l(&_connection_m); + if (_connection) { + _connection->calc_and_send_config_if_needed(new_conf); + } +} + +/** + * @brief shutdown configuration, once this method has been called, this object + * is dead and must be deleted + * + */ +void to_agent_connector::shutdown() { + absl::MutexLock l(&_connection_m); + if (_alive) { + SPDLOG_LOGGER_INFO(get_logger(), "shutdown client of {}", + get_conf()->get_hostport()); + if (_connection) { + _connection->shutdown(); + _connection.reset(); + } + _alive = false; + } +} + +/** + * @brief called by connection + * reconnection is delayed of 10 second + * + */ +void to_agent_connector::on_error() { + common::defer(_io_context, std::chrono::seconds(10), + [me = shared_from_this()] { me->start(); }); +} \ No newline at end of file diff --git a/engine/modules/opentelemetry/src/open_telemetry.cc b/engine/modules/opentelemetry/src/open_telemetry.cc index 34da5d8fbf8..cd9c1800567 100644 --- a/engine/modules/opentelemetry/src/open_telemetry.cc +++ b/engine/modules/opentelemetry/src/open_telemetry.cc @@ -18,6 +18,7 @@ #include "com/centreon/exceptions/msg_fmt.hh" +#include "centreon_agent/agent_impl.hh" #include "com/centreon/common/http/https_connection.hh" #include "com/centreon/engine/modules/opentelemetry/open_telemetry.hh" @@ -50,8 +51,21 @@ open_telemetry::open_telemetry( void open_telemetry::_reload() { std::unique_ptr new_conf = std::make_unique(_config_file_path, *_io_context); - if (!_conf || *new_conf->get_grpc_config() != *_conf->get_grpc_config()) { - this->_create_otl_server(new_conf->get_grpc_config()); + + if (new_conf->get_grpc_config()) { + if (!_conf || *new_conf->get_grpc_config() != *_conf->get_grpc_config()) { + this->_create_otl_server(new_conf->get_grpc_config(), + new_conf->get_centreon_agent_config()); + } + if (_conf && *_conf->get_centreon_agent_config() != + *new_conf->get_centreon_agent_config()) { + _otl_server->update_agent_config(new_conf->get_centreon_agent_config()); + } + } else { // only reverse connection + std::shared_ptr to_shutdown = std::move(_otl_server); + if (to_shutdown) { + to_shutdown->shutdown(std::chrono::seconds(10)); + } } if (!new_conf->get_telegraf_conf_server_config()) { @@ -76,6 +90,23 @@ void open_telemetry::_reload() { new_conf->get_max_fifo_size()); _conf = std::move(new_conf); + + // push new configuration to connected agents + centreon_agent::agent_impl<::grpc::ServerBidiReactor< + agent::MessageFromAgent, agent::MessageToAgent>>:: + all_agent_calc_and_send_config_if_needed( + _conf->get_centreon_agent_config()); + + if (!_agent_reverse_client) { + _agent_reverse_client = + std::make_unique( + _io_context, + [me = shared_from_this()](const metric_request_ptr& request) { + me->_on_metric(request); + }, + _logger); + } + _agent_reverse_client->update(_conf->get_centreon_agent_config()); } } @@ -105,14 +136,15 @@ std::shared_ptr open_telemetry::load( * @param server_conf json server config */ void open_telemetry::_create_otl_server( - const grpc_config::pointer& server_conf) { + const grpc_config::pointer& server_conf, + const centreon_agent::agent_config::pointer& agent_conf) { try { std::shared_ptr to_shutdown = std::move(_otl_server); if (to_shutdown) { to_shutdown->shutdown(std::chrono::seconds(10)); } _otl_server = otl_server::load( - server_conf, + _io_context, server_conf, agent_conf, [me = shared_from_this()](const metric_request_ptr& request) { me->_on_metric(request); }, diff --git a/engine/modules/opentelemetry/src/otl_check_result_builder.cc b/engine/modules/opentelemetry/src/otl_check_result_builder.cc index e1f75423fee..517374773a5 100644 --- a/engine/modules/opentelemetry/src/otl_check_result_builder.cc +++ b/engine/modules/opentelemetry/src/otl_check_result_builder.cc @@ -21,6 +21,8 @@ #include "data_point_fifo_container.hh" #include "otl_check_result_builder.hh" + +#include "centreon_agent/agent_check_result_builder.hh" #include "telegraf/nagios_check_result_builder.hh" #include "absl/flags/commandlineflag.h" @@ -147,6 +149,11 @@ std::shared_ptr otl_check_result_builder::create( return std::make_shared( cmd_line, command_id, host, service, timeout, std::move(handler), logger); + case check_result_builder_config::converter_type:: + centreon_agent_check_result_builder: + return std::make_shared( + cmd_line, command_id, host, service, timeout, std::move(handler), + logger); default: SPDLOG_LOGGER_ERROR(logger, "unknown converter type:{}", cmd_line); throw exceptions::msg_fmt("unknown converter type:{}", cmd_line); @@ -200,6 +207,10 @@ otl_check_result_builder::create_check_result_builder_config( return std::make_shared( check_result_builder_config::converter_type:: nagios_check_result_builder); + } else if (extractor_type == "centreon_agent") { + return std::make_shared( + check_result_builder_config::converter_type:: + centreon_agent_check_result_builder); } else { throw exceptions::msg_fmt("unknown processor in {}", cmd_line); } diff --git a/engine/modules/opentelemetry/src/otl_config.cc b/engine/modules/opentelemetry/src/otl_config.cc index c36fd359f24..570f39ff14e 100644 --- a/engine/modules/opentelemetry/src/otl_config.cc +++ b/engine/modules/opentelemetry/src/otl_config.cc @@ -19,6 +19,8 @@ #include "com/centreon/common/rapidjson_helper.hh" #include "com/centreon/engine/globals.hh" +#include "centreon_agent/agent.grpc.pb.h" + #include "otl_config.hh" #include "otl_fmt.hh" @@ -63,11 +65,8 @@ static constexpr std::string_view _grpc_config_schema(R"( "description": "http(s) telegraf config server", "type": "object" } - }, - "required": [ - "otel_server" - ], - "type": "object" + }, + "type" : "object" } )"); @@ -97,12 +96,48 @@ otl_config::otl_config(const std::string_view& file_path, _json_grpc_log = file_content.get_bool("grpc_json_log", false); _second_fifo_expiry = file_content.get_unsigned("second_fifo_expiry", 600); _max_fifo_size = file_content.get_unsigned("max_fifo_size", 5); - _grpc_conf = - std::make_shared(file_content.get_member("otel_server")); + if (file_content.has_member("otel_server")) { + try { + _grpc_conf = + std::make_shared(file_content.get_member("otel_server")); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(config_logger, + "fail to parse otl_server object: ", e.what()); + throw; + } + } + + if (file_content.has_member("centreon_agent")) { + try { + _centreon_agent_config = std::make_shared( + file_content.get_member("centreon_agent")); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR( + config_logger, + "fail to parse centreon agent conf server object: ", e.what()); + throw; + } + } + + // nor server nor reverse client? + if (!_grpc_conf && + !(_centreon_agent_config && + !_centreon_agent_config->get_agent_grpc_reverse_conf().empty())) { + throw exceptions::msg_fmt( + "nor an grpc server, nor a reverse client configured"); + } + if (file_content.has_member("telegraf_conf_server")) { - _telegraf_conf_server_config = - std::make_shared( - file_content.get_member("telegraf_conf_server"), io_context); + try { + _telegraf_conf_server_config = + std::make_shared( + file_content.get_member("telegraf_conf_server"), io_context); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR( + config_logger, + "fail to parse telegraf conf server object: ", e.what()); + throw; + } } } diff --git a/engine/modules/opentelemetry/src/otl_data_point.cc b/engine/modules/opentelemetry/src/otl_data_point.cc index 515244c92a9..7e5273725f1 100644 --- a/engine/modules/opentelemetry/src/otl_data_point.cc +++ b/engine/modules/opentelemetry/src/otl_data_point.cc @@ -21,6 +21,15 @@ using namespace com::centreon::engine::modules::opentelemetry; using namespace ::opentelemetry::proto::metrics::v1; +/** + * @brief SummaryDataPoint doesn't have Exemplars so we use it to return an + * array of exemplars in any case + * + */ +static const ::google::protobuf::RepeatedPtrField< + ::opentelemetry::proto::metrics::v1::Exemplar> + _empty_exemplars; + otl_data_point::otl_data_point( const metric_request_ptr& parent, const ::opentelemetry::proto::resource::v1::Resource& resource, @@ -33,6 +42,7 @@ otl_data_point::otl_data_point( _metric(metric), _data_point(data_pt), _data_point_attributes(data_pt.attributes()), + _exemplars(data_pt.exemplars()), _nano_timestamp(data_pt.time_unix_nano()), _type(data_point_type::number) { _value = data_pt.as_double() ? data_pt.as_double() : data_pt.as_int(); @@ -50,6 +60,7 @@ otl_data_point::otl_data_point( _metric(metric), _data_point(data_pt), _data_point_attributes(data_pt.attributes()), + _exemplars(data_pt.exemplars()), _nano_timestamp(data_pt.time_unix_nano()), _type(data_point_type::histogram) { _value = data_pt.count(); @@ -68,6 +79,7 @@ otl_data_point::otl_data_point( _metric(metric), _data_point(data_pt), _data_point_attributes(data_pt.attributes()), + _exemplars(data_pt.exemplars()), _nano_timestamp(data_pt.time_unix_nano()), _type(data_point_type::exponential_histogram) { _value = data_pt.count(); @@ -85,6 +97,7 @@ otl_data_point::otl_data_point( _metric(metric), _data_point(data_pt), _data_point_attributes(data_pt.attributes()), + _exemplars(_empty_exemplars), _nano_timestamp(data_pt.time_unix_nano()), _type(data_point_type::summary) { _value = data_pt.count(); diff --git a/engine/modules/opentelemetry/src/otl_server.cc b/engine/modules/opentelemetry/src/otl_server.cc index b6b9097df78..96385d872d6 100644 --- a/engine/modules/opentelemetry/src/otl_server.cc +++ b/engine/modules/opentelemetry/src/otl_server.cc @@ -19,6 +19,7 @@ #include #include +#include "centreon_agent/agent.grpc.pb.h" #include "opentelemetry/proto/collector/metrics/v1/metrics_service.grpc.pb.h" #include "otl_fmt.hh" @@ -282,12 +283,19 @@ ::grpc::ServerUnaryReactor* metric_service::Export( * @param conf grpc configuration * @param handler handler that will be called on every request */ -otl_server::otl_server(const grpc_config::pointer& conf, - const metric_handler& handler, - const std::shared_ptr& logger) +otl_server::otl_server( + const std::shared_ptr& io_context, + const grpc_config::pointer& conf, + const centreon_agent::agent_config::pointer& agent_config, + const metric_handler& handler, + const std::shared_ptr& logger) : common::grpc::grpc_server_base(conf, logger), - _service(detail::metric_service::load(handler, logger)) {} + _service(detail::metric_service::load(handler, logger)), + _agent_service(centreon_agent::agent_service::load(io_context, + agent_config, + handler, + logger)) {} /** * @brief Destroy the otl server::otl server object @@ -305,10 +313,13 @@ otl_server::~otl_server() { * @return otl_server::pointer otl_server started */ otl_server::pointer otl_server::load( + const std::shared_ptr& io_context, const grpc_config::pointer& conf, + const centreon_agent::agent_config::pointer& agent_config, const metric_handler& handler, const std::shared_ptr& logger) { - otl_server::pointer ret(new otl_server(conf, handler, logger)); + otl_server::pointer ret( + new otl_server(io_context, conf, agent_config, handler, logger)); ret->start(); return ret; } @@ -320,5 +331,16 @@ otl_server::pointer otl_server::load( void otl_server::start() { _init([this](::grpc::ServerBuilder& builder) { builder.RegisterService(_service.get()); + builder.RegisterService(_agent_service.get()); }); } + +/** + * @brief update conf used by service to create + * + * @param agent_config + */ +void otl_server::update_agent_config( + const centreon_agent::agent_config::pointer& agent_config) { + _agent_service->update(agent_config); +} \ No newline at end of file diff --git a/engine/modules/opentelemetry/src/telegraf/conf_server.cc b/engine/modules/opentelemetry/src/telegraf/conf_server.cc index b7b53fa2ec8..d6e4d720571 100644 --- a/engine/modules/opentelemetry/src/telegraf/conf_server.cc +++ b/engine/modules/opentelemetry/src/telegraf/conf_server.cc @@ -18,6 +18,7 @@ #include +#include "conf_helper.hh" #include "telegraf/conf_server.hh" #include "com/centreon/engine/globals.hh" @@ -37,7 +38,7 @@ using namespace com::centreon::engine; static constexpr std::string_view _config_schema(R"( { "$schema": "http://json-schema.org/draft-04/schema#", - "title": "grpc config", + "title": "telegraf config", "properties": { "http_server" : { "listen_address": { @@ -240,19 +241,18 @@ template void conf_session::on_receive_request( const std::shared_ptr& request) { boost::url_view parsed(request->target()); - std::vector host_list; + std::string host; for (const auto& get_param : parsed.params()) { if (get_param.key == "host") { - host_list.emplace_back(get_param.value); + host = get_param.value; } } auto to_call = std::packaged_task( - [me = shared_from_this(), request, - hosts = std::move(host_list)]() mutable -> int32_t { + [me = shared_from_this(), request, host]() mutable -> int32_t { // then we are in the main thread // services, hosts and commands are stable - me->answer_to_request(request, std::move(hosts)); + me->answer_to_request(request, host); return 0; }); command_manager::instance().enqueue(std::move(to_call)); @@ -386,15 +386,10 @@ bool conf_session::_get_commands(const std::string& host_name, template void conf_session::answer_to_request( const std::shared_ptr& request, - std::vector&& host_list) { + const std::string& host) { http::response_ptr resp(std::make_shared()); resp->version(request->version()); - if (host_list.empty()) { - SPDLOG_LOGGER_ERROR(this->_logger, "no host found in target argument {}", - *request); - } - resp->body() = fmt::format(R"(# Centreon telegraf configuration # This telegraf configuration is generated by centreon centengine [agent] @@ -407,10 +402,7 @@ void conf_session::answer_to_request( )", _telegraf_conf->get_check_interval(), _telegraf_conf->get_engine_otl_endpoint()); - bool at_least_one_found = false; - for (const std::string& host : host_list) { - at_least_one_found |= _get_commands(host, resp->body()); - } + bool at_least_one_found = _get_commands(host, resp->body()); if (at_least_one_found) { resp->result(boost::beast::http::status::ok); resp->insert(boost::beast::http::field::content_type, "text/plain"); diff --git a/engine/precomp_inc/precomp.hh b/engine/precomp_inc/precomp.hh index 852545a1567..0d306a733b3 100644 --- a/engine/precomp_inc/precomp.hh +++ b/engine/precomp_inc/precomp.hh @@ -62,6 +62,7 @@ #include #include +#include #include #include #include diff --git a/engine/src/service.cc b/engine/src/service.cc index c4b96d2962c..0791f3038c7 100644 --- a/engine/src/service.cc +++ b/engine/src/service.cc @@ -693,26 +693,29 @@ com::centreon::engine::service* add_service( engine_logger(log_config_error, basic) << "Error: Service comes from a database, therefore its service id " << "must not be null"; - config_logger->error( + SPDLOG_LOGGER_ERROR( + config_logger, "Error: Service comes from a database, therefore its service id must " "not be null"); return nullptr; } else if (description.empty()) { engine_logger(log_config_error, basic) << "Error: Service description is not set"; - config_logger->error("Error: Service description is not set"); + SPDLOG_LOGGER_ERROR(config_logger, "Error: Service description is not set"); return nullptr; } else if (host_name.empty()) { engine_logger(log_config_error, basic) << "Error: Host name of service '" << description << "' is not set"; - config_logger->error("Error: Host name of service '{}' is not set", - description); + SPDLOG_LOGGER_ERROR(config_logger, + "Error: Host name of service '{}' is not set", + description); return nullptr; } else if (check_command.empty()) { engine_logger(log_config_error, basic) << "Error: Check command of service '" << description << "' on host '" << host_name << "' is not set"; - config_logger->error( + SPDLOG_LOGGER_ERROR( + config_logger, "Error: Check command of service '{}' on host '{}' is not set", description, host_name); return nullptr; @@ -724,7 +727,8 @@ com::centreon::engine::service* add_service( << "Error: The service '" << description << "' cannot be created because" << " host '" << host_name << "' does not exist (host_id is null)"; - config_logger->error( + SPDLOG_LOGGER_ERROR( + config_logger, "Error: The service '{}' cannot be created because host '{}' does not " "exist (host_id is null)", description, host_name); @@ -734,7 +738,8 @@ com::centreon::engine::service* add_service( << "Error: The service '" << description << "' cannot be created because the host id corresponding to the host" << " '" << host_name << "' is not the same as the one in configuration"; - config_logger->error( + SPDLOG_LOGGER_ERROR( + config_logger, "Error: The service '{}' cannot be created because the host id " "corresponding to the host '{}' is not the same as the one in " "configuration", @@ -749,7 +754,8 @@ com::centreon::engine::service* add_service( << "Error: Invalid max_attempts, check_interval, retry_interval" ", or notification_interval value for service '" << description << "' on host '" << host_name << "'"; - config_logger->error( + SPDLOG_LOGGER_ERROR( + config_logger, "Error: Invalid max_attempts, check_interval, retry_interval" ", or notification_interval value for service '{}' on host '{}'", description, host_name); @@ -761,7 +767,8 @@ com::centreon::engine::service* add_service( engine_logger(log_config_error, basic) << "Error: Service '" << description << "' on host '" << host_name << "' has already been defined"; - config_logger->error( + SPDLOG_LOGGER_ERROR( + config_logger, "Error: Service '{}' on host '{}' has already been defined", description, host_name); return nullptr; @@ -3873,7 +3880,8 @@ void service::resolve(int& w, int& e) { engine_logger(log_verification_error, basic) << "Error: Service description '" << name() << "' of host '" << _hostname << "' has problem in its notifier part: " << e.what(); - config_logger->error( + SPDLOG_LOGGER_ERROR( + config_logger, "Error: Service description '{}' of host '{}' has problem in its " "notifier part: {}", name(), _hostname, e.what()); @@ -3891,7 +3899,8 @@ void service::resolve(int& w, int& e) { << "' specified in service " "'" << name() << "' not defined anywhere!"; - config_logger->error( + SPDLOG_LOGGER_ERROR( + config_logger, "Error: Host '{}' specified in service '{}' not defined anywhere!", _hostname, name()); errors++; @@ -3952,7 +3961,8 @@ void service::resolve(int& w, int& e) { << "Error: The description string for service '" << name() << "' on host '" << _hostname << "' contains one or more illegal characters."; - config_logger->error( + SPDLOG_LOGGER_ERROR( + config_logger, "Error: The description string for service '{}' on host '{}' contains " "one or more illegal characters.", name(), _hostname); diff --git a/engine/tests/CMakeLists.txt b/engine/tests/CMakeLists.txt index 651c5ae6ef0..2a57fe3f474 100755 --- a/engine/tests/CMakeLists.txt +++ b/engine/tests/CMakeLists.txt @@ -111,6 +111,9 @@ if(WITH_TESTING) "${TESTS_DIR}/notifications/service_timeperiod_notification.cc" "${TESTS_DIR}/notifications/service_flapping_notification.cc" "${TESTS_DIR}/notifications/service_downtime_notification_test.cc" + "${TESTS_DIR}/opentelemetry/agent_check_result_builder_test.cc" + "${TESTS_DIR}/opentelemetry/agent_reverse_client_test.cc" + "${TESTS_DIR}/opentelemetry/agent_to_engine_test.cc" "${TESTS_DIR}/opentelemetry/grpc_config_test.cc" "${TESTS_DIR}/opentelemetry/host_serv_extractor_test.cc" "${TESTS_DIR}/opentelemetry/otl_server_test.cc" @@ -157,7 +160,9 @@ if(WITH_TESTING) add_executable(ut_engine ${ut_sources}) target_include_directories(ut_engine PRIVATE ${MODULE_DIR_OTL}/src - ${CMAKE_SOURCE_DIR}/common/grpc/inc) + ${CMAKE_SOURCE_DIR}/common/grpc/inc + ${CMAKE_SOURCE_DIR}/agent/inc + ${CMAKE_SOURCE_DIR}/agent/src) target_precompile_headers(ut_engine REUSE_FROM cce_core) @@ -193,6 +198,7 @@ if(WITH_TESTING) cce_core log_v2 opentelemetry + centreon_agent_lib "-Wl,-no-whole-archive" pb_open_telemetry_lib centreon_grpc diff --git a/engine/tests/opentelemetry/agent_check_result_builder_test.cc b/engine/tests/opentelemetry/agent_check_result_builder_test.cc new file mode 100644 index 00000000000..68ec18706aa --- /dev/null +++ b/engine/tests/opentelemetry/agent_check_result_builder_test.cc @@ -0,0 +1,482 @@ +/** + * Copyright 2024 Centreon + * + * This file is part of Centreon Engine. + * + * Centreon Engine is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Centreon Engine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Centreon Engine. If not, see + * . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "com/centreon/engine/configuration/applier/contact.hh" +#include "com/centreon/engine/configuration/applier/host.hh" +#include "com/centreon/engine/configuration/applier/service.hh" +#include "com/centreon/engine/configuration/host.hh" +#include "com/centreon/engine/configuration/service.hh" + +#include "opentelemetry/proto/collector/metrics/v1/metrics_service.pb.h" +#include "opentelemetry/proto/common/v1/common.pb.h" +#include "opentelemetry/proto/metrics/v1/metrics.pb.h" + +#include "com/centreon/engine/modules/opentelemetry/data_point_fifo_container.hh" + +#include "com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh" + +#include "com/centreon/engine/modules/opentelemetry/centreon_agent/agent_check_result_builder.hh" + +#include "helper.hh" +#include "test_engine.hh" + +using namespace com::centreon::engine::modules::opentelemetry; +using namespace com::centreon::engine; + +static const char* agent_exemple = R"( +{ + "resourceMetrics": [ + { + "resource": { + "attributes": [ + { + "key": "host.name", + "value": { + "stringValue": "test_host" + } + }, + { + "key": "service.name", + "value": { + "stringValue": "" + } + } + ] + }, + "scopeMetrics": [ + { + "metrics": [ + { + "name": "status", + "description": "0", + "gauge": { + "dataPoints": [ + { + "timeUnixNano": "1718345061146529731", + "asInt": "0" + } + ] + } + } + ] + } + ] + }, + { + "resource": { + "attributes": [ + { + "key": "host.name", + "value": { + "stringValue": "test_host" + } + }, + { + "key": "service.name", + "value": { + "stringValue": "test_svc_builder" + } + } + ] + }, + "scopeMetrics": [ + { + "metrics": [ + { + "name": "status", + "description": "output of plugin", + "gauge": { + "dataPoints": [ + { + "timeUnixNano": "1718345061381922153", + "asInt": "0" + } + ] + } + }, + { + "name": "metric", + "gauge": { + "dataPoints": [ + { + "timeUnixNano": "1718345061381922153", + "exemplars": [ + { + "asDouble": 75, + "filteredAttributes": [ + { + "key": "crit_gt" + } + ] + }, + { + "asDouble": 0, + "filteredAttributes": [ + { + "key": "crit_lt" + } + ] + }, + { + "asDouble": 50, + "filteredAttributes": [ + { + "key": "warn_gt" + } + ] + }, + { + "asDouble": 0, + "filteredAttributes": [ + { + "key": "warn_lt" + } + ] + } + ], + "asInt": "12" + } + ] + } + }, + { + "name": "metric2", + "unit": "ms", + "gauge": { + "dataPoints": [ + { + "timeUnixNano": "1718345061381922153", + "exemplars": [ + { + "asDouble": 80, + "filteredAttributes": [ + { + "key": "crit_gt" + } + ] + }, + { + "asDouble": 75, + "filteredAttributes": [ + { + "key": "crit_lt" + } + ] + }, + { + "asDouble": 75, + "filteredAttributes": [ + { + "key": "warn_gt" + } + ] + }, + { + "asDouble": 50, + "filteredAttributes": [ + { + "key": "warn_lt" + } + ] + }, + { + "asDouble": 0, + "filteredAttributes": [ + { + "key": "min" + } + ] + }, + { + "asDouble": 100, + "filteredAttributes": [ + { + "key": "max" + } + ] + } + ], + "asInt": "30" + } + ] + } + } + ] + } + ] + }, + { + "resource": { + "attributes": [ + { + "key": "host.name", + "value": { + "stringValue": "test_host" + } + }, + { + "key": "service.name", + "value": { + "stringValue": "test_svc_builder_2" + } + } + ] + }, + "scopeMetrics": [ + { + "metrics": [ + { + "name": "status", + "description": "output taratata", + "gauge": { + "dataPoints": [ + { + "timeUnixNano": "1718345061713456225", + "asInt": "0" + } + ] + } + }, + { + "name": "metric", + "gauge": { + "dataPoints": [ + { + "timeUnixNano": "1718345061713456225", + "exemplars": [ + { + "asDouble": 75, + "filteredAttributes": [ + { + "key": "crit_ge" + } + ] + }, + { + "asDouble": 50, + "filteredAttributes": [ + { + "key": "warn_ge" + } + ] + }, + { + "asDouble": 0, + "filteredAttributes": [ + { + "key": "warn_le" + } + ] + } + ], + "asInt": "12" + } + ] + } + }, + { + "name": "metric2", + "unit": "ms", + "gauge": { + "dataPoints": [ + { + "timeUnixNano": "1718345061713456225", + "exemplars": [ + { + "asDouble": 80, + "filteredAttributes": [ + { + "key": "crit_gt" + } + ] + }, + { + "asDouble": 75, + "filteredAttributes": [ + { + "key": "crit_lt" + } + ] + }, + { + "asDouble": 75, + "filteredAttributes": [ + { + "key": "warn_gt" + } + ] + }, + { + "asDouble": 0, + "filteredAttributes": [ + { + "key": "min" + } + ] + }, + { + "asDouble": 100, + "filteredAttributes": [ + { + "key": "max" + } + ] + } + ], + "asInt": "30" + } + ] + } + } + ] + } + ] + } + ] +} +)"; + +class otl_agent_check_result_builder_test : public TestEngine { + protected: + std::shared_ptr _builder_config; + data_point_fifo_container _fifos; + + public: + otl_agent_check_result_builder_test() { + if (service::services.find({"test_host", "test_svc_builder_2"}) == + service::services.end()) { + init_config_state(); + config->contacts().clear(); + configuration::applier::contact ct_aply; + configuration::contact ctct{new_configuration_contact("admin", true)}; + ct_aply.add_object(ctct); + ct_aply.expand_objects(*config); + ct_aply.resolve_object(ctct); + + configuration::host hst{ + new_configuration_host("test_host", "admin", 457)}; + configuration::applier::host hst_aply; + hst_aply.add_object(hst); + + configuration::service svc{new_configuration_service( + "test_host", "test_svc_builder", "admin", 458)}; + configuration::applier::service svc_aply; + svc_aply.add_object(svc); + configuration::service svc2{new_configuration_service( + "test_host", "test_svc_builder_2", "admin", 459)}; + svc_aply.add_object(svc2); + + hst_aply.resolve_object(hst); + svc_aply.resolve_object(svc); + svc_aply.resolve_object(svc2); + } + + _builder_config = + otl_check_result_builder::create_check_result_builder_config( + "--processor=centreon_agent"); + + metric_request_ptr request = + std::make_shared< ::opentelemetry::proto::collector::metrics::v1:: + ExportMetricsServiceRequest>(); + + ::google::protobuf::util::JsonStringToMessage(agent_exemple, request.get()); + + otl_data_point::extract_data_points( + request, [&](const otl_data_point& data_pt) { + std::string service_name; + for (const auto attrib : data_pt.get_resource().attributes()) { + if (attrib.key() == "service.name") { + service_name = attrib.value().string_value(); + break; + } + } + _fifos.add_data_point("test_host", service_name, + data_pt.get_metric().name(), data_pt); + }); + } +}; + +TEST_F(otl_agent_check_result_builder_test, test_svc_builder) { + auto check_result_builder = otl_check_result_builder::create( + "", _builder_config, 1789, *host::hosts.find("test_host")->second, + service::services.find({"test_host", "test_svc_builder"})->second.get(), + std::chrono::system_clock::time_point(), [&](const commands::result&) {}, + spdlog::default_logger()); + + commands::result res; + bool success = + check_result_builder->sync_build_result_from_metrics(_fifos, res); + + ASSERT_TRUE(success); + ASSERT_EQ(res.exit_code, 0); + ASSERT_EQ(res.exit_status, com::centreon::process::normal); + ASSERT_EQ(res.command_id, 1789); + ASSERT_EQ(res.start_time.to_useconds(), 1718345061381922153 / 1000); + ASSERT_EQ(res.end_time.to_useconds(), 1718345061381922153 / 1000); + + auto compare_to_excepted = [](const std::string& to_cmp) -> bool { + return to_cmp == + "output of plugin| metric=12;0:50;0:75;; " + "metric2=30ms;50:75;75:80;0;100" || + to_cmp == + "output of plugin| metric2=30ms;50:75;75:80;0;100 " + "metric=12;0:50;0:75;;"; + }; + + ASSERT_PRED1(compare_to_excepted, res.output); +} + +TEST_F(otl_agent_check_result_builder_test, test_svc_builder_2) { + auto check_result_builder = otl_check_result_builder::create( + "", _builder_config, 1789, *host::hosts.find("test_host")->second, + service::services.find({"test_host", "test_svc_builder_2"})->second.get(), + std::chrono::system_clock::time_point(), [&](const commands::result&) {}, + spdlog::default_logger()); + + commands::result res; + bool success = + check_result_builder->sync_build_result_from_metrics(_fifos, res); + + ASSERT_TRUE(success); + ASSERT_EQ(res.exit_code, 0); + ASSERT_EQ(res.exit_status, com::centreon::process::normal); + ASSERT_EQ(res.command_id, 1789); + ASSERT_EQ(res.start_time.to_useconds(), 1718345061713456225 / 1000); + ASSERT_EQ(res.end_time.to_useconds(), 1718345061713456225 / 1000); + + auto compare_to_excepted = [](const std::string& to_cmp) -> bool { + return to_cmp == + "output taratata| metric=12;@0:50;@~:75;; " + "metric2=30ms;~:75;75:80;0;100" || + to_cmp == + "output taratata| metric2=30ms;~:75;75:80;0;100 " + "metric=12;@0:50;@~:75;;"; + }; + + ASSERT_PRED1(compare_to_excepted, res.output); +} \ No newline at end of file diff --git a/engine/tests/opentelemetry/agent_reverse_client_test.cc b/engine/tests/opentelemetry/agent_reverse_client_test.cc new file mode 100644 index 00000000000..fa0421139b0 --- /dev/null +++ b/engine/tests/opentelemetry/agent_reverse_client_test.cc @@ -0,0 +1,154 @@ +/** + * Copyright 2024 Centreon + * + * This file is part of Centreon Engine. + * + * Centreon Engine is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Centreon Engine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Centreon Engine. If not, see + * . + */ + +#include + +#include "opentelemetry/proto/collector/metrics/v1/metrics_service.pb.h" +#include "opentelemetry/proto/common/v1/common.pb.h" +#include "opentelemetry/proto/metrics/v1/metrics.pb.h" + +#include "com/centreon/engine/modules/opentelemetry/centreon_agent/agent_reverse_client.hh" +#include "com/centreon/engine/modules/opentelemetry/centreon_agent/to_agent_connector.hh" + +using namespace com::centreon::engine::modules::opentelemetry; +using namespace com::centreon::engine::modules::opentelemetry::centreon_agent; + +extern std::shared_ptr g_io_context; + +struct fake_connector : public to_agent_connector { + using config_to_fake = absl::btree_map, + grpc_config_compare>; + + fake_connector(const grpc_config::pointer& conf, + const std::shared_ptr& io_context, + const centreon_agent::agent_config::pointer& agent_conf, + const metric_handler& handler, + const std::shared_ptr& logger) + : to_agent_connector(conf, io_context, agent_conf, handler, logger) {} + + void start() override { + all_fake.emplace(std::static_pointer_cast(get_conf()), + shared_from_this()); + } + + static std::shared_ptr load( + const grpc_config::pointer& conf, + const std::shared_ptr& io_context, + const centreon_agent::agent_config::pointer& agent_conf, + const metric_handler& handler, + const std::shared_ptr& logger) { + std::shared_ptr ret = std::make_shared( + conf, io_context, agent_conf, handler, logger); + ret->start(); + return ret; + } + + static config_to_fake all_fake; + + void shutdown() override { + all_fake.erase(std::static_pointer_cast(get_conf())); + } +}; + +fake_connector::config_to_fake fake_connector::all_fake; + +class my_agent_reverse_client : public agent_reverse_client { + public: + my_agent_reverse_client( + const std::shared_ptr& io_context, + const metric_handler& handler, + const std::shared_ptr& logger) + : agent_reverse_client(io_context, handler, logger) {} + + agent_reverse_client::config_to_client::iterator + _create_new_client_connection( + const grpc_config::pointer& agent_endpoint, + const agent_config::pointer& agent_conf) override { + return _agents + .try_emplace(agent_endpoint, + fake_connector::load(agent_endpoint, _io_context, + agent_conf, _metric_handler, _logger)) + .first; + } + + void _shutdown_connection(config_to_client::const_iterator to_delete) { + to_delete->second->shutdown(); + } +}; + +TEST(agent_reverse_client, update_config) { + my_agent_reverse_client to_test( + g_io_context, [](const metric_request_ptr&) {}, spdlog::default_logger()); + + ASSERT_TRUE(fake_connector::all_fake.empty()); + + auto agent_conf = std::shared_ptr( + new centreon_agent::agent_config( + 60, 100, 60, 10, + {std::make_shared("host1:port1", false)})); + to_test.update(agent_conf); + ASSERT_EQ(fake_connector::all_fake.size(), 1); + ASSERT_EQ(fake_connector::all_fake.begin()->first, + *agent_conf->get_agent_grpc_reverse_conf().begin()); + agent_conf = std::make_shared(1, 100, 1, 10); + to_test.update(agent_conf); + ASSERT_EQ(fake_connector::all_fake.size(), 0); + + agent_conf = std::shared_ptr( + new centreon_agent::agent_config( + 60, 100, 60, 10, + {std::make_shared("host1:port1", false), + std::make_shared("host1:port3", false)})); + to_test.update(agent_conf); + ASSERT_EQ(fake_connector::all_fake.size(), 2); + auto first_conn = fake_connector::all_fake.begin()->second; + auto second_conn = (++fake_connector::all_fake.begin())->second; + agent_conf = std::shared_ptr( + new centreon_agent::agent_config( + 60, 100, 60, 10, + {std::make_shared("host1:port1", false), + std::make_shared("host1:port2", false), + std::make_shared("host1:port3", false)})); + + to_test.update(agent_conf); + ASSERT_EQ(fake_connector::all_fake.size(), 3); + ASSERT_EQ(fake_connector::all_fake.begin()->second, first_conn); + ASSERT_EQ((++(++fake_connector::all_fake.begin()))->second, second_conn); + second_conn = (++fake_connector::all_fake.begin())->second; + auto third_conn = (++(++fake_connector::all_fake.begin()))->second; + + agent_conf = std::shared_ptr( + new centreon_agent::agent_config( + 60, 100, 60, 10, + {std::make_shared("host1:port1", false), + std::make_shared("host1:port3", false)})); + to_test.update(agent_conf); + ASSERT_EQ(fake_connector::all_fake.size(), 2); + ASSERT_EQ(fake_connector::all_fake.begin()->second, first_conn); + ASSERT_EQ((++fake_connector::all_fake.begin())->second, third_conn); + + agent_conf = std::shared_ptr( + new centreon_agent::agent_config( + 60, 100, 60, 10, + {std::make_shared("host1:port3", false)})); + to_test.update(agent_conf); + ASSERT_EQ(fake_connector::all_fake.size(), 1); + ASSERT_EQ(fake_connector::all_fake.begin()->second, third_conn); +} \ No newline at end of file diff --git a/engine/tests/opentelemetry/agent_to_engine_test.cc b/engine/tests/opentelemetry/agent_to_engine_test.cc new file mode 100644 index 00000000000..e601dea0eaf --- /dev/null +++ b/engine/tests/opentelemetry/agent_to_engine_test.cc @@ -0,0 +1,316 @@ +/** + * Copyright 2024 Centreon + * + * This file is part of Centreon Engine. + * + * Centreon Engine is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Centreon Engine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Centreon Engine. If not, see + * . + */ + +#include +#include + +#include +#include + +#include + +#include "opentelemetry/proto/collector/metrics/v1/metrics_service.grpc.pb.h" +#include "opentelemetry/proto/metrics/v1/metrics.pb.h" + +#include "com/centreon/engine/host.hh" +#include "com/centreon/engine/service.hh" + +#include "com/centreon/engine/command_manager.hh" +#include "com/centreon/engine/configuration/applier/connector.hh" +#include "com/centreon/engine/configuration/applier/contact.hh" +#include "com/centreon/engine/configuration/applier/host.hh" +#include "com/centreon/engine/configuration/applier/service.hh" + +#include "com/centreon/agent/streaming_client.hh" +#include "com/centreon/engine/modules/opentelemetry/otl_fmt.hh" +#include "com/centreon/engine/modules/opentelemetry/otl_server.hh" + +#include "../test_engine.hh" +#include "helper.hh" + +using namespace com::centreon::engine; +using namespace com::centreon::agent; +// using namespace com::centreon::engine::configuration; +// using namespace com::centreon::engine::configuration::applier; +using namespace com::centreon::engine::modules::opentelemetry; +using namespace ::opentelemetry::proto::collector::metrics::v1; + +class agent_to_engine_test : public TestEngine { + protected: + std::shared_ptr _server; + + // agent code is mono-thread so it runs on his own io_context run by only one + // thread + std::shared_ptr _agent_io_context; + + asio::executor_work_guard _worker; + std::thread _agent_io_ctx_thread; + + public: + agent_to_engine_test() + : _agent_io_context(std::make_shared()), + _worker{asio::make_work_guard(*_agent_io_context)}, + _agent_io_ctx_thread([this] { _agent_io_context->run(); }) {} + + ~agent_to_engine_test() { + _agent_io_context->stop(); + _agent_io_ctx_thread.join(); + } + + void SetUp() override { + spdlog::default_logger()->set_level(spdlog::level::trace); + ::fmt::formatter< ::opentelemetry::proto::collector::metrics::v1:: + ExportMetricsServiceRequest>::json_grpc_format = true; + init_config_state(); + + configuration::applier::connector conn_aply; + configuration::connector cnn("agent"); + cnn.parse("connector_line", + "opentelemetry " + "--processor=nagios_telegraf --extractor=attributes " + "--host_path=resource_metrics.scope_metrics.data.data_points." + "attributes.host " + "--service_path=resource_metrics.scope_metrics.data.data_points." + "attributes.service"); + conn_aply.add_object(cnn); + + configuration::applier::contact ct_aply; + configuration::contact ctct{new_configuration_contact("admin", true)}; + ct_aply.add_object(ctct); + ct_aply.expand_objects(*config); + ct_aply.resolve_object(ctct); + + configuration::host hst = + new_configuration_host("test_host", "admin", 1, "agent"); + + configuration::applier::host hst_aply; + hst_aply.add_object(hst); + + configuration::service svc{new_configuration_service( + "test_host", "test_svc", "admin", 1, "agent")}; + configuration::service svc2{new_configuration_service( + "test_host", "test_svc_2", "admin", 2, "agent")}; + configuration::service svc_no_otel{ + new_configuration_service("test_host", "test_svc_2", "admin", 3)}; + configuration::applier::service svc_aply; + svc_aply.add_object(svc); + svc_aply.add_object(svc2); + svc_aply.add_object(svc_no_otel); + + hst_aply.resolve_object(hst); + svc_aply.resolve_object(svc); + svc_aply.resolve_object(svc2); + svc_aply.resolve_object(svc_no_otel); + } + + void TearDown() override { + if (_server) { + _server->shutdown(std::chrono::seconds(15)); + _server.reset(); + } + deinit_config_state(); + } + + template + void start_server(const grpc_config::pointer& listen_endpoint, + const centreon_agent::agent_config::pointer& agent_conf, + const metric_handler_type& handler) { + _server = otl_server::load(_agent_io_context, listen_endpoint, agent_conf, + handler, spdlog::default_logger()); + } +}; + +bool compare_to_expected_host_metric( + const opentelemetry::proto::metrics::v1::ResourceMetrics& metric) { + bool host_found = false, serv_found = false; + for (const auto& attrib : metric.resource().attributes()) { + if (attrib.key() == "host.name") { + if (attrib.value().string_value() != "test_host") { + return false; + } + host_found = true; + } + if (attrib.key() == "service.name") { + if (!attrib.value().string_value().empty()) { + return false; + } + serv_found = true; + } + } + if (!host_found || !serv_found) { + return false; + } + const auto& scope_metric = metric.scope_metrics(); + if (scope_metric.size() != 1) + return false; + const auto& metrics = scope_metric.begin()->metrics(); + if (metrics.empty()) + return false; + const auto& status_metric = *metrics.begin(); + if (status_metric.name() != "status") + return false; + if (!status_metric.has_gauge()) + return false; + if (status_metric.gauge().data_points().empty()) + return false; + return status_metric.gauge().data_points().begin()->as_int() == 0; +} + +bool test_exemplars( + const google::protobuf::RepeatedPtrField< + ::opentelemetry::proto::metrics::v1::Exemplar>& examplars, + const std::map& expected) { + std::set matches; + + for (const auto& ex : examplars) { + if (ex.filtered_attributes().empty()) + continue; + auto search = expected.find(ex.filtered_attributes().begin()->key()); + if (search == expected.end()) + return false; + + if (search->second != ex.as_double()) + return false; + matches.insert(search->first); + } + return matches.size() == expected.size(); +} + +bool compare_to_expected_serv_metric( + const opentelemetry::proto::metrics::v1::ResourceMetrics& metric, + const std::string_view& serv_name) { + bool host_found = false, serv_found = false; + for (const auto& attrib : metric.resource().attributes()) { + if (attrib.key() == "host.name") { + if (attrib.value().string_value() != "test_host") { + return false; + } + host_found = true; + } + if (attrib.key() == "service.name") { + if (attrib.value().string_value() == serv_name) { + return false; + } + serv_found = true; + } + } + if (!host_found || !serv_found) { + return false; + } + const auto& scope_metric = metric.scope_metrics(); + if (scope_metric.size() != 1) + return false; + const auto& metrics = scope_metric.begin()->metrics(); + if (metrics.empty()) + return false; + + for (const auto& met : metrics) { + if (!met.has_gauge()) + return false; + if (met.name() == "metric") { + if (met.gauge().data_points().empty()) + return false; + if (met.gauge().data_points().begin()->as_int() != 12) + return false; + if (!test_exemplars(met.gauge().data_points().begin()->exemplars(), + {{"crit_gt", 75.0}, + {"crit_lt", 0.0}, + {"warn_gt", 50.0}, + {"warn_lt", 0.0}})) + return false; + } else if (met.name() == "metric2") { + if (met.gauge().data_points().empty()) + return false; + if (met.gauge().data_points().begin()->as_int() != 30) + return false; + if (!test_exemplars(met.gauge().data_points().begin()->exemplars(), + {{"crit_gt", 80.0}, + {"crit_lt", 75.0}, + {"warn_gt", 75.0}, + {"warn_lt", 50.0}, + {"min", 0.0}, + {"max", 100.0}})) + return false; + + } else if (met.name() == "status") { + if (met.gauge().data_points().begin()->as_int() != 0) + return false; + } else + return false; + } + + return true; +} + +TEST_F(agent_to_engine_test, server_send_conf_to_agent_and_receive_metrics) { + grpc_config::pointer listen_endpoint = + std::make_shared("127.0.0.1:4623", false); + + absl::Mutex mut; + std::vector received; + std::vector + resource_metrics; + + auto agent_conf = std::make_shared(1, 10, 1, 5); + + start_server(listen_endpoint, agent_conf, + [&](const metric_request_ptr& metric) { + absl::MutexLock l(&mut); + received.push_back(metric); + for (const opentelemetry::proto::metrics::v1::ResourceMetrics& + res_metric : metric->resource_metrics()) { + resource_metrics.push_back(&res_metric); + } + }); + + auto agent_client = + streaming_client::load(_agent_io_context, spdlog::default_logger(), + listen_endpoint, "test_host"); + + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + command_manager::instance().execute(); + + auto metric_received = [&]() { return resource_metrics.size() >= 3; }; + + mut.LockWhen(absl::Condition(&metric_received)); + mut.Unlock(); + + agent_client->shutdown(); + + _server->shutdown(std::chrono::seconds(15)); + + bool host_metric_found = true; + bool serv_1_found = false; + bool serv_2_found = false; + + for (const opentelemetry::proto::metrics::v1::ResourceMetrics* to_compare : + resource_metrics) { + if (compare_to_expected_serv_metric(*to_compare, "test_svc")) { + serv_1_found = true; + } else if (compare_to_expected_serv_metric(*to_compare, "test_svc_2")) { + serv_2_found = true; + } else if (compare_to_expected_host_metric(*to_compare)) { + host_metric_found = true; + } else + ASSERT_TRUE(false); + } + ASSERT_TRUE(host_metric_found); + ASSERT_TRUE(serv_1_found); + ASSERT_TRUE(serv_2_found); +} \ No newline at end of file diff --git a/engine/tests/opentelemetry/open_telemetry_test.cc b/engine/tests/opentelemetry/open_telemetry_test.cc index 58603487909..7e14bc917b7 100644 --- a/engine/tests/opentelemetry/open_telemetry_test.cc +++ b/engine/tests/opentelemetry/open_telemetry_test.cc @@ -60,7 +60,9 @@ extern std::shared_ptr g_io_context; class open_telemetry : public com::centreon::engine::modules::opentelemetry::open_telemetry { protected: - void _create_otl_server(const grpc_config::pointer& server_conf) override {} + void _create_otl_server( + const grpc_config::pointer& server_conf, + const centreon_agent::agent_config::pointer&) override {} public: open_telemetry(const std::string_view config_file_path, diff --git a/engine/tests/opentelemetry/otl_server_test.cc b/engine/tests/opentelemetry/otl_server_test.cc index 8c99d849c64..5d6291a6cc3 100644 --- a/engine/tests/opentelemetry/otl_server_test.cc +++ b/engine/tests/opentelemetry/otl_server_test.cc @@ -32,6 +32,8 @@ using namespace com::centreon::engine::modules::opentelemetry; using namespace ::opentelemetry::proto::collector::metrics::v1; +extern std::shared_ptr g_io_context; + class otl_client { std::shared_ptr<::grpc::Channel> _channel; std::unique_ptr _stub; @@ -81,18 +83,18 @@ class otl_server_test : public ::testing::Test { template void start_server(const grpc_config::pointer& conf, const metric_handler_type& handler) { - _server = otl_server::load(conf, handler, spdlog::default_logger()); + std::shared_ptr agent_conf = + std::make_shared(60, 100, 60, 10); + _server = otl_server::load(g_io_context, conf, agent_conf, handler, + spdlog::default_logger()); } }; TEST_F(otl_server_test, unsecure_client_server) { grpc_config::pointer serv_conf = std::make_shared("127.0.0.1:6789", false); - std::shared_ptr received; - auto handler = - [&](const std::shared_ptr& request) { - received = request; - }; + metric_request_ptr received; + auto handler = [&](const metric_request_ptr& request) { received = request; }; start_server(serv_conf, handler); otl_client client("127.0.0.1:6789"); diff --git a/engine/tests/test_engine.cc b/engine/tests/test_engine.cc index 30daa0c6516..ef892f82c87 100644 --- a/engine/tests/test_engine.cc +++ b/engine/tests/test_engine.cc @@ -132,7 +132,8 @@ TestEngine::new_configuration_servicedependency( configuration::host TestEngine::new_configuration_host( const std::string& hostname, const std::string& contacts, - uint64_t hst_id) { + uint64_t hst_id, + const std::string_view& connector) { configuration::host hst; hst.parse("host_name", hostname.c_str()); hst.parse("address", "127.0.0.1"); @@ -141,6 +142,9 @@ configuration::host TestEngine::new_configuration_host( configuration::command cmd("hcmd"); cmd.parse("command_line", "echo 0"); + if (!connector.empty()) { + cmd.parse("connector", connector.data()); + } hst.parse("check_command", "hcmd"); configuration::applier::command cmd_aply; cmd_aply.add_object(cmd); @@ -169,7 +173,8 @@ configuration::service TestEngine::new_configuration_service( const std::string& hostname, const std::string& description, const std::string& contacts, - uint64_t svc_id) { + uint64_t svc_id, + const std::string_view& connector) { configuration::service svc; svc.parse("host_name", hostname.c_str()); svc.parse("description", description.c_str()); @@ -187,9 +192,12 @@ configuration::service TestEngine::new_configuration_service( else svc.set_host_id(12); - configuration::command cmd("cmd"); - cmd.parse("command_line", "echo 'output| metric=$ARG1$;50;75'"); - svc.parse("check_command", "cmd!12"); + configuration::command cmd(fmt::format("cmd_serv_{}", svc_id)); + cmd.parse("command_line", "echo -n 'output| metric=$ARG1$;50;75 metric2=30ms;50:75;75:80;0;100'"); + if (!connector.empty()) { + cmd.parse("connector", connector.data()); + } + svc.parse("check_command", (cmd.command_name() + "!12").c_str()); configuration::applier::command cmd_aply; cmd_aply.add_object(cmd); diff --git a/engine/tests/test_engine.hh b/engine/tests/test_engine.hh index f20334c7577..1c335d7e775 100644 --- a/engine/tests/test_engine.hh +++ b/engine/tests/test_engine.hh @@ -41,14 +41,17 @@ class TestEngine : public ::testing::Test { std::string const& name, bool full, const std::string& notif = "a") const; - configuration::host new_configuration_host(std::string const& hostname, - std::string const& contacts, - uint64_t hst_id = 12); + configuration::host new_configuration_host( + std::string const& hostname, + std::string const& contacts, + uint64_t hst_id = 12, + const std::string_view& connector = ""); configuration::service new_configuration_service( std::string const& hostname, std::string const& description, std::string const& contacts, - uint64_t svc_id = 13); + uint64_t svc_id = 13, + const std::string_view& connector = ""); configuration::anomalydetection new_configuration_anomalydetection( std::string const& hostname, std::string const& description,