From 5707547193379a40bf64ff105127950ad5e89fa8 Mon Sep 17 00:00:00 2001 From: Kai <33246768+KayzzzZ@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:29:16 +0800 Subject: [PATCH 1/6] fix: eBPFServer UT memory leak (#1937) --- core/unittest/ebpf/CMakeLists.txt | 6 +++--- core/unittest/ebpf/eBPFServerUnittest.cpp | 13 +++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/core/unittest/ebpf/CMakeLists.txt b/core/unittest/ebpf/CMakeLists.txt index 1a600a28b4..0c5e43b71d 100644 --- a/core/unittest/ebpf/CMakeLists.txt +++ b/core/unittest/ebpf/CMakeLists.txt @@ -1,10 +1,10 @@ cmake_minimum_required(VERSION 3.22) project(ebpf_unittest) -# add_executable(ebpf_server_unittest eBPFServerUnittest.cpp) -# target_link_libraries(ebpf_server_unittest ${UT_BASE_TARGET}) +add_executable(ebpf_server_unittest eBPFServerUnittest.cpp) +target_link_libraries(ebpf_server_unittest ${UT_BASE_TARGET}) include(GoogleTest) -# gtest_discover_tests(ebpf_server_unittest) +gtest_discover_tests(ebpf_server_unittest) diff --git a/core/unittest/ebpf/eBPFServerUnittest.cpp b/core/unittest/ebpf/eBPFServerUnittest.cpp index a8c1763b91..9dd6f31d04 100644 --- a/core/unittest/ebpf/eBPFServerUnittest.cpp +++ b/core/unittest/ebpf/eBPFServerUnittest.cpp @@ -73,6 +73,7 @@ class eBPFServerUnittest : public testing::Test { config_->mProfileProbeConfig.mProfileUploadDuration = 10; config_->mProcessProbeConfig.mEnableOOMDetect = false; } + void TearDown() override { delete config_; } private: template void setJSON(Json::Value& v, const std::string& key, const T& value) { @@ -500,7 +501,6 @@ void eBPFServerUnittest::HandleStats(nami::NamiStatisticsHandler cb, int plus) { stats.emplace_back(std::move(networkSecurityStat)); stats.emplace_back(std::move(processSecurityStat)); stats.emplace_back(std::move(fileSecurityStat)); - std::cout << "3" << std::endl; if (cb) cb(stats); } @@ -601,18 +601,16 @@ void eBPFServerUnittest::TestEnableNetworkPlugin() { bool res = ebpf::InitObserverNetworkOption(configJson, network_option, &ctx, "test"); EXPECT_TRUE(res); // observer_options.Init(ObserverType::NETWORK, configJson, &ctx, "test"); - auto input = new InputNetworkObserver(); + std::shared_ptr input(new InputNetworkObserver()); input->SetContext(ctx); input->SetMetricsRecordRef("test", "1"); auto initStatus = input->Init(configJson, optionalGoPipeline); EXPECT_TRUE(initStatus); - std::cout << "1" << std::endl; res = ebpf::eBPFServer::GetInstance()->EnablePlugin( "test", 1, nami::PluginType::NETWORK_OBSERVE, &ctx, &network_option, input->mPluginMgr); - std::cout << "2" << std::endl; EXPECT_EQ(ebpf::eBPFServer::GetInstance()->mMonitorMgr->mInited[int(nami::PluginType::NETWORK_OBSERVE)], true); auto& mgr = ebpf::eBPFServer::GetInstance()->mMonitorMgr->mSelfMonitors[int(nami::PluginType::NETWORK_OBSERVE)]; @@ -627,7 +625,6 @@ void eBPFServerUnittest::TestEnableNetworkPlugin() { EXPECT_TRUE(res); auto conf = ebpf::eBPFServer::GetInstance()->mSourceManager->mConfig.get(); HandleStats(conf->stats_handler_, 1); - std::cout << "3" << std::endl; auto network_conf = std::get(conf->config_); EXPECT_EQ(conf->plugin_type_, nami::PluginType::NETWORK_OBSERVE); EXPECT_EQ(conf->type, UpdataType::SECURE_UPDATE_TYPE_ENABLE_PROBE); @@ -695,7 +692,7 @@ void eBPFServerUnittest::TestEnableProcessPlugin() { APSARA_TEST_TRUE(ParseJsonTable(configStr, configJson, errorMsg)); SecurityOptions security_options; security_options.Init(SecurityProbeType::PROCESS, configJson, &ctx, "input_process_security"); - auto input = new InputProcessSecurity(); + std::shared_ptr input(new InputProcessSecurity()); input->SetContext(ctx); input->SetMetricsRecordRef("test", "1"); input->Init(configJson, optionalGoPipeline); @@ -753,7 +750,7 @@ void eBPFServerUnittest::TestEnableNetworkSecurePlugin() { } } )"; - auto input = new InputNetworkSecurity(); + std::shared_ptr input(new InputNetworkSecurity()); input->SetContext(ctx); input->SetMetricsRecordRef("test", "1"); @@ -824,7 +821,7 @@ void eBPFServerUnittest::TestEnableFileSecurePlugin() { } )"; - auto input = new InputFileSecurity(); + std::shared_ptr input(new InputFileSecurity()); input->SetContext(ctx); input->SetMetricsRecordRef("test", "1"); From 729a6943b7cd589171fe23174163db110393e9cd Mon Sep 17 00:00:00 2001 From: henryzhx8 Date: Tue, 3 Dec 2024 09:32:42 +0800 Subject: [PATCH 2/6] improve sls authentication logic (#1894) --- core/common/SafeQueue.h | 4 + core/monitor/profile_sender/ProfileSender.cpp | 7 +- core/pipeline/limiter/ConcurrencyLimiter.cpp | 43 +++-- core/pipeline/limiter/ConcurrencyLimiter.h | 33 ++-- core/pipeline/plugin/interface/HttpFlusher.h | 2 +- core/plugin/flusher/sls/DiskBufferWriter.cpp | 84 +++++---- core/plugin/flusher/sls/DiskBufferWriter.h | 4 +- core/plugin/flusher/sls/FlusherSLS.cpp | 115 +++++------- core/plugin/flusher/sls/FlusherSLS.h | 2 +- core/plugin/flusher/sls/SLSClientManager.cpp | 171 +++++++++++++++--- core/plugin/flusher/sls/SLSClientManager.h | 25 ++- core/runner/FlusherRunner.cpp | 22 ++- core/runner/sink/http/HttpSink.cpp | 16 +- core/sdk/Client.cpp | 159 +++++----------- core/sdk/Client.h | 45 +---- core/sls_control/SLSControl.cpp | 168 ----------------- core/sls_control/SLSControl.h | 49 ----- core/unittest/flusher/CMakeLists.txt | 8 + .../pipeline/ConcurrencyLimiterUnittest.cpp | 11 +- core/unittest/plugin/PluginMock.h | 13 +- .../queue/SenderQueueManagerUnittest.cpp | 6 +- core/unittest/queue/SenderQueueUnittest.cpp | 4 +- core/unittest/sdk/SDKCommonUnittest.cpp | 2 +- .../unittest/sender/FlusherRunnerUnittest.cpp | 70 +++++++ 24 files changed, 503 insertions(+), 560 deletions(-) delete mode 100644 core/sls_control/SLSControl.cpp delete mode 100644 core/sls_control/SLSControl.h diff --git a/core/common/SafeQueue.h b/core/common/SafeQueue.h index 632fb9b0f0..9b0d11dde1 100644 --- a/core/common/SafeQueue.h +++ b/core/common/SafeQueue.h @@ -74,6 +74,10 @@ class SafeQueue { return mQueue.size(); } +#ifdef APSARA_UNIT_TEST_MAIN + void Clear() { std::queue().swap(mQueue); } +#endif + private: std::queue mQueue; mutable std::mutex mMux; diff --git a/core/monitor/profile_sender/ProfileSender.cpp b/core/monitor/profile_sender/ProfileSender.cpp index f89667496c..d899cbae00 100644 --- a/core/monitor/profile_sender/ProfileSender.cpp +++ b/core/monitor/profile_sender/ProfileSender.cpp @@ -26,7 +26,8 @@ #include "EnterpriseProfileSender.h" #endif #include "sdk/Exception.h" -#include "sls_control/SLSControl.h" +#include "plugin/flusher/sls/SLSClientManager.h" +#include "app_config/AppConfig.h" // TODO: temporarily used #include "common/compression/CompressorFactory.h" @@ -165,8 +166,8 @@ void ProfileSender::SendRunningStatus(sls_logs::LogGroup& logGroup) { } logtailStatus["__logs__"][0] = status; string logBody = logtailStatus.toStyledString(); - sdk::Client client(endpoint, "", "", INT32_FLAG(sls_client_send_timeout), "", ""); - SLSControl::GetInstance()->SetSlsSendClientCommonParam(&client); + sdk::Client client("", endpoint, INT32_FLAG(sls_client_send_timeout)); + client.SetPort(AppConfig::GetInstance()->GetDataServerPort()); try { string res; if (!CompressLz4(logBody, res)) { diff --git a/core/pipeline/limiter/ConcurrencyLimiter.cpp b/core/pipeline/limiter/ConcurrencyLimiter.cpp index 1fdb45ec7b..ac75c8de27 100644 --- a/core/pipeline/limiter/ConcurrencyLimiter.cpp +++ b/core/pipeline/limiter/ConcurrencyLimiter.cpp @@ -14,21 +14,24 @@ #include "pipeline/limiter/ConcurrencyLimiter.h" +#include "common/StringTools.h" +#include "logger/Logger.h" + using namespace std; namespace logtail { #ifdef APSARA_UNIT_TEST_MAIN -uint32_t ConcurrencyLimiter::GetCurrentLimit() const { +uint32_t ConcurrencyLimiter::GetCurrentLimit() const { lock_guard lock(mLimiterMux); - return mCurrenctConcurrency; + return mCurrenctConcurrency; } -uint32_t ConcurrencyLimiter::GetCurrentInterval() const { +uint32_t ConcurrencyLimiter::GetCurrentInterval() const { lock_guard lock(mLimiterMux); - return mRetryIntervalSecs; + return mRetryIntervalSecs; } -void ConcurrencyLimiter::SetCurrentLimit(uint32_t limit) { +void ConcurrencyLimiter::SetCurrentLimit(uint32_t limit) { lock_guard lock(mLimiterMux); mCurrenctConcurrency = limit; } @@ -36,11 +39,11 @@ void ConcurrencyLimiter::SetCurrentLimit(uint32_t limit) { void ConcurrencyLimiter::SetInSendingCount(uint32_t count) { mInSendingCnt.store(count); } -uint32_t ConcurrencyLimiter::GetInSendingCount() const { return mInSendingCnt.load(); } - +uint32_t ConcurrencyLimiter::GetInSendingCount() const { + return mInSendingCnt.load(); +} #endif - bool ConcurrencyLimiter::IsValidToPop() { lock_guard lock(mLimiterMux); if (mCurrenctConcurrency == 0) { @@ -54,7 +57,7 @@ bool ConcurrencyLimiter::IsValidToPop() { } if (mCurrenctConcurrency > mInSendingCnt.load()) { return true; - } + } return false; } @@ -67,22 +70,38 @@ void ConcurrencyLimiter::OnSendDone() { } void ConcurrencyLimiter::OnSuccess() { - lock_guard lock(mLimiterMux); + lock_guard lock(mLimiterMux); if (mCurrenctConcurrency <= 0) { mRetryIntervalSecs = mMinRetryIntervalSecs; - } + LOG_INFO(sLogger, ("reset send retry interval, type", mDescription)); + } if (mCurrenctConcurrency != mMaxConcurrency) { ++mCurrenctConcurrency; + if (mCurrenctConcurrency == mMaxConcurrency) { + LOG_INFO(sLogger, + ("increase send concurrency to maximum, type", mDescription)("concurrency", mCurrenctConcurrency)); + } else { + LOG_DEBUG(sLogger, + ("increase send concurrency, type", + mDescription)("from", mCurrenctConcurrency - 1)("to", mCurrenctConcurrency)); + } } } void ConcurrencyLimiter::OnFail() { lock_guard lock(mLimiterMux); if (mCurrenctConcurrency != 0) { + auto old = mCurrenctConcurrency; mCurrenctConcurrency = static_cast(mCurrenctConcurrency * mConcurrencyDownRatio); + LOG_INFO(sLogger, ("decrease send concurrency, type", mDescription)("from", old)("to", mCurrenctConcurrency)); } else { if (mRetryIntervalSecs != mMaxRetryIntervalSecs) { - mRetryIntervalSecs = min(mMaxRetryIntervalSecs, static_cast(mRetryIntervalSecs * mRetryIntervalUpRatio)); + auto old = mRetryIntervalSecs; + mRetryIntervalSecs + = min(mMaxRetryIntervalSecs, static_cast(mRetryIntervalSecs * mRetryIntervalUpRatio)); + LOG_INFO(sLogger, + ("increase send retry interval, type", + mDescription)("from", ToString(old) + "s")("to", ToString(mRetryIntervalSecs) + "s")); } } } diff --git a/core/pipeline/limiter/ConcurrencyLimiter.h b/core/pipeline/limiter/ConcurrencyLimiter.h index 6a260aad7c..1191326b04 100644 --- a/core/pipeline/limiter/ConcurrencyLimiter.h +++ b/core/pipeline/limiter/ConcurrencyLimiter.h @@ -17,10 +17,10 @@ #pragma once #include -#include +#include #include +#include #include -#include #include "monitor/metric_constants/MetricConstants.h" @@ -28,11 +28,20 @@ namespace logtail { class ConcurrencyLimiter { public: - ConcurrencyLimiter(uint32_t maxConcurrency, uint32_t maxRetryIntervalSecs = 3600, - uint32_t minRetryIntervalSecs = 30, double retryIntervalUpRatio = 1.5, double concurrencyDownRatio = 0.5) : - mMaxConcurrency(maxConcurrency), mCurrenctConcurrency(maxConcurrency), - mMaxRetryIntervalSecs(maxRetryIntervalSecs), mMinRetryIntervalSecs(minRetryIntervalSecs), - mRetryIntervalSecs(minRetryIntervalSecs), mRetryIntervalUpRatio(retryIntervalUpRatio), mConcurrencyDownRatio(concurrencyDownRatio) {} + ConcurrencyLimiter(const std::string& description, + uint32_t maxConcurrency, + uint32_t maxRetryIntervalSecs = 3600, + uint32_t minRetryIntervalSecs = 30, + double retryIntervalUpRatio = 1.5, + double concurrencyDownRatio = 0.5) + : mDescription(description), + mMaxConcurrency(maxConcurrency), + mCurrenctConcurrency(maxConcurrency), + mMaxRetryIntervalSecs(maxRetryIntervalSecs), + mMinRetryIntervalSecs(minRetryIntervalSecs), + mRetryIntervalSecs(minRetryIntervalSecs), + mRetryIntervalUpRatio(retryIntervalUpRatio), + mConcurrencyDownRatio(concurrencyDownRatio) {} bool IsValidToPop(); void PostPop(); @@ -43,12 +52,12 @@ class ConcurrencyLimiter { static std::string GetLimiterMetricName(const std::string& limiter) { if (limiter == "region") { - return METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_REGION_LIMITER_TIMES_TOTAL; + return METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_REGION_LIMITER_TIMES_TOTAL; } else if (limiter == "project") { - return METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_PROJECT_LIMITER_TIMES_TOTAL; + return METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_PROJECT_LIMITER_TIMES_TOTAL; } else if (limiter == "logstore") { - return METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_LOGSTORE_LIMITER_TIMES_TOTAL; - } + return METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_LOGSTORE_LIMITER_TIMES_TOTAL; + } return limiter; } @@ -63,6 +72,8 @@ class ConcurrencyLimiter { #endif private: + const std::string mDescription; + std::atomic_uint32_t mInSendingCnt = 0U; uint32_t mMaxConcurrency = 0; diff --git a/core/pipeline/plugin/interface/HttpFlusher.h b/core/pipeline/plugin/interface/HttpFlusher.h index 27b2bde2ff..a8bba21296 100644 --- a/core/pipeline/plugin/interface/HttpFlusher.h +++ b/core/pipeline/plugin/interface/HttpFlusher.h @@ -27,7 +27,7 @@ class HttpFlusher : public Flusher { public: virtual ~HttpFlusher() = default; - virtual std::unique_ptr BuildRequest(SenderQueueItem* item) const = 0; + virtual bool BuildRequest(SenderQueueItem* item, std::unique_ptr& req, bool* keepItem) const = 0; virtual void OnSendDone(const HttpResponse& response, SenderQueueItem* item) = 0; virtual SinkType GetSinkType() override { return SinkType::HTTP; } diff --git a/core/plugin/flusher/sls/DiskBufferWriter.cpp b/core/plugin/flusher/sls/DiskBufferWriter.cpp index dd60ba7211..2d8e6e22f4 100644 --- a/core/plugin/flusher/sls/DiskBufferWriter.cpp +++ b/core/plugin/flusher/sls/DiskBufferWriter.cpp @@ -32,7 +32,6 @@ #include "protobuf/sls/sls_logs.pb.h" #include "provider/Provider.h" #include "sdk/Exception.h" -#include "sls_control/SLSControl.h" DEFINE_FLAG_INT32(write_secondary_wait_timeout, "interval of dump seconary buffer from memory to file, seconds", 2); DEFINE_FLAG_INT32(buffer_file_alive_interval, "the max alive time of a bufferfile, 5 minutes", 300); @@ -41,6 +40,9 @@ DEFINE_FLAG_INT32(quota_exceed_wait_interval, "when daemon buffer thread get quo DEFINE_FLAG_INT32(secondary_buffer_count_limit, "data ready for write buffer file", 20); DEFINE_FLAG_INT32(send_retry_sleep_interval, "sleep microseconds when sync send fail, 50ms", 50000); DEFINE_FLAG_INT32(buffer_check_period, "check logtail local storage buffer period", 60); +DEFINE_FLAG_INT32(unauthorized_wait_interval, "", 1); + +DECLARE_FLAG_INT32(discard_send_fail_interval); using namespace std; @@ -482,7 +484,7 @@ void DiskBufferWriter::SendEncryptionBuffer(const std::string& filename, int32_t SendResult res = SendBufferFileData(bufferMeta, logData, errorCode); if (res == SEND_OK) sendResult = true; - else if (res == SEND_DISCARD_ERROR || res == SEND_UNAUTHORIZED) { + else if (res == SEND_DISCARD_ERROR || res == SEND_PARAMETER_INVALID) { AlarmManager::GetInstance()->SendAlarm(SEND_DATA_FAIL_ALARM, string("send buffer file fail, rawsize:") + ToString(bufferMeta.rawsize()) @@ -492,8 +494,7 @@ void DiskBufferWriter::SendEncryptionBuffer(const std::string& filename, int32_t ""); sendResult = true; discardCount++; - } else if (res == SEND_QUOTA_EXCEED && INT32_FLAG(quota_exceed_wait_interval) > 0) - sleep(INT32_FLAG(quota_exceed_wait_interval)); + } } } delete[] des; @@ -715,27 +716,19 @@ SendResult DiskBufferWriter::SendBufferFileData(const sls_logs::LogtailBufferMet if (endpoint.empty()) sendRes = SEND_NETWORK_ERROR; else { - sendRes = SendToNetSync(sendClient, bufferMeta, logData, errorCode); - } - if (sendRes == SEND_NETWORK_ERROR) { - SLSClientManager::GetInstance()->UpdateEndpointStatus(region, endpoint, false); - SLSClientManager::GetInstance()->ResetClientEndpoint(bufferMeta.aliuid(), region, time(NULL)); - LOG_DEBUG(sLogger, - ("SendBufferFileData", - "SEND_NETWORK_ERROR")("region", region)("aliuid", bufferMeta.aliuid())("endpoint", endpoint)); - } else if (sendRes == SEND_UNAUTHORIZED) { - int32_t lastUpdateTime; - if (SLSControl::GetInstance()->SetSlsSendClientAuth(bufferMeta.aliuid(), false, sendClient, lastUpdateTime)) - sendRes = SendToNetSync(sendClient, bufferMeta, logData, errorCode); + sendRes = SendToNetSync(sendClient, region, endpoint, bufferMeta, logData, errorCode); } return sendRes; } SendResult DiskBufferWriter::SendToNetSync(sdk::Client* sendClient, + const std::string& region, + const std::string& endpoint, const sls_logs::LogtailBufferMeta& bufferMeta, const std::string& logData, std::string& errorCode) { int32_t retryTimes = 0; + time_t beginTime = time(NULL); while (true) { ++retryTimes; try { @@ -775,30 +768,55 @@ SendResult DiskBufferWriter::SendToNetSync(sdk::Client* sendClient, } catch (sdk::LOGException& ex) { errorCode = ex.GetErrorCode(); SendResult sendRes = ConvertErrorCode(errorCode); - if (sendRes == SEND_DISCARD_ERROR || sendRes == SEND_UNAUTHORIZED || sendRes == SEND_QUOTA_EXCEED - || retryTimes >= INT32_FLAG(send_retrytimes)) { - if (sendRes == SEND_QUOTA_EXCEED) + bool hasAuthError = false; + switch (sendRes) { + case SEND_NETWORK_ERROR: + case SEND_SERVER_ERROR: + SLSClientManager::GetInstance()->UpdateEndpointStatus(region, endpoint, false); + SLSClientManager::GetInstance()->ResetClientEndpoint(bufferMeta.aliuid(), region, time(NULL)); + LOG_WARNING(sLogger, + ("send data to SLS fail", "retry later")("error_code", errorCode)( + "error_message", ex.GetMessage())("endpoint", sendClient->GetRawSlsHost())( + "projectName", bufferMeta.project())("logstore", bufferMeta.logstore())( + "RetryTimes", retryTimes)("rawsize", bufferMeta.rawsize())); + usleep(INT32_FLAG(send_retry_sleep_interval)); + break; + case SEND_QUOTA_EXCEED: AlarmManager::GetInstance()->SendAlarm(SEND_QUOTA_EXCEED_ALARM, "error_code: " + errorCode + ", error_message: " + ex.GetMessage(), bufferMeta.project(), bufferMeta.logstore(), ""); - // no region - if (!GetProfileSender()->IsProfileData("", bufferMeta.project(), bufferMeta.logstore())) - LOG_ERROR(sLogger, - ("send data to SLS fail, error_code", errorCode)("error_message", ex.GetMessage())( - "endpoint", sendClient->GetRawSlsHost())("projectName", bufferMeta.project())( - "logstore", bufferMeta.logstore())("RetryTimes", retryTimes)("rawsize", - bufferMeta.rawsize())); + // no region + if (!GetProfileSender()->IsProfileData("", bufferMeta.project(), bufferMeta.logstore())) + LOG_WARNING(sLogger, + ("send data to SLS fail, error_code", errorCode)("error_message", ex.GetMessage())( + "endpoint", sendClient->GetRawSlsHost())("projectName", bufferMeta.project())( + "logstore", bufferMeta.logstore())("RetryTimes", retryTimes)( + "rawsize", bufferMeta.rawsize())); + usleep(INT32_FLAG(quota_exceed_wait_interval)); + break; + case SEND_UNAUTHORIZED: + hasAuthError = true; + usleep(INT32_FLAG(unauthorized_wait_interval)); + break; + default: + break; + } + SLSClientManager::GetInstance()->UpdateAccessKeyStatus(bufferMeta.aliuid(), !hasAuthError); + if (time(nullptr) - beginTime >= INT32_FLAG(discard_send_fail_interval)) { + sendRes = SEND_DISCARD_ERROR; + } + if (sendRes != SEND_NETWORK_ERROR && sendRes != SEND_SERVER_ERROR && sendRes != SEND_QUOTA_EXCEED + && sendRes != SEND_UNAUTHORIZED) { return sendRes; - } else { - LOG_DEBUG( - sLogger, - ("send data to SLS fail", "retry later")("error_code", errorCode)("error_message", ex.GetMessage())( - "endpoint", sendClient->GetRawSlsHost())("projectName", bufferMeta.project())( - "logstore", bufferMeta.logstore())("RetryTimes", retryTimes)("rawsize", bufferMeta.rawsize())); - usleep(INT32_FLAG(send_retry_sleep_interval)); + } + { + lock_guard lock(mBufferSenderThreadRunningMux); + if (!mIsSendBufferThreadRunning) { + return sendRes; + } } } catch (...) { if (retryTimes >= INT32_FLAG(send_retrytimes)) { diff --git a/core/plugin/flusher/sls/DiskBufferWriter.h b/core/plugin/flusher/sls/DiskBufferWriter.h index 14d3d9cfe5..0ed9367c5e 100644 --- a/core/plugin/flusher/sls/DiskBufferWriter.h +++ b/core/plugin/flusher/sls/DiskBufferWriter.h @@ -24,9 +24,9 @@ #include #include "common/SafeQueue.h" +#include "pipeline/queue/SenderQueueItem.h" #include "plugin/flusher/sls/SendResult.h" #include "protobuf/sls/logtail_buffer_meta.pb.h" -#include "pipeline/queue/SenderQueueItem.h" #include "sdk/Client.h" namespace logtail { @@ -65,6 +65,8 @@ class DiskBufferWriter { void BufferSenderThread(); SendResult SendToNetSync(sdk::Client* sendClient, + const std::string& region, + const std::string& endpoint, const sls_logs::LogtailBufferMeta& bufferMeta, const std::string& logData, std::string& errorCode); diff --git a/core/plugin/flusher/sls/FlusherSLS.cpp b/core/plugin/flusher/sls/FlusherSLS.cpp index 7a518bd734..3fcdb6fa6b 100644 --- a/core/plugin/flusher/sls/FlusherSLS.cpp +++ b/core/plugin/flusher/sls/FlusherSLS.cpp @@ -14,11 +14,6 @@ #include "plugin/flusher/sls/FlusherSLS.h" -#include "sls_logs.pb.h" - -#ifdef __ENTERPRISE__ -#include "config/provider/EnterpriseConfigProvider.h" -#endif #include "app_config/AppConfig.h" #include "common/EndpointUtil.h" #include "common/HashUtil.h" @@ -26,6 +21,10 @@ #include "common/ParamExtractor.h" #include "common/TimeUtil.h" #include "common/compression/CompressorFactory.h" +#include "sls_logs.pb.h" +#ifdef __ENTERPRISE__ +#include "config/provider/EnterpriseConfigProvider.h" +#endif #include "pipeline/Pipeline.h" #include "pipeline/batch/FlushStrategy.h" #include "pipeline/queue/QueueKeyManager.h" @@ -38,7 +37,6 @@ #include "provider/Provider.h" #include "runner/FlusherRunner.h" #include "sdk/Common.h" -#include "sls_control/SLSControl.h" // TODO: temporarily used here #include "pipeline/PipelineManager.h" #include "plugin/flusher/sls/DiskBufferWriter.h" @@ -83,9 +81,7 @@ static const char* GetOperationString(OperationOnFail op) { } static OperationOnFail DefaultOperation(uint32_t retryTimes) { - if (retryTimes == 1) { - return OperationOnFail::RETRY_IMMEDIATELY; - } else if (retryTimes > static_cast(INT32_FLAG(unknow_error_try_max))) { + if (retryTimes > static_cast(INT32_FLAG(unknow_error_try_max))) { return OperationOnFail::DISCARD; } else { return OperationOnFail::RETRY_LATER; @@ -95,7 +91,6 @@ static OperationOnFail DefaultOperation(uint32_t retryTimes) { void FlusherSLS::InitResource() { #ifndef APSARA_UNIT_TEST_MAIN if (!sIsResourceInited) { - SLSControl::GetInstance()->Init(); SLSClientManager::GetInstance()->Init(); DiskBufferWriter::GetInstance()->Init(); sIsResourceInited = true; @@ -117,9 +112,8 @@ unordered_map> FlusherSLS::sProjectConcurre unordered_map> FlusherSLS::sRegionConcurrencyLimiterMap; unordered_map> FlusherSLS::sLogstoreConcurrencyLimiterMap; - -shared_ptr GetConcurrencyLimiter() { - return make_shared(AppConfig::GetInstance()->GetSendRequestConcurrency()); +shared_ptr GetConcurrencyLimiter(const std::string& description) { + return make_shared(description, AppConfig::GetInstance()->GetSendRequestConcurrency()); } shared_ptr FlusherSLS::GetLogstoreConcurrencyLimiter(const std::string& project, @@ -129,12 +123,12 @@ shared_ptr FlusherSLS::GetLogstoreConcurrencyLimiter(const s auto iter = sLogstoreConcurrencyLimiterMap.find(key); if (iter == sLogstoreConcurrencyLimiterMap.end()) { - auto limiter = GetConcurrencyLimiter(); + auto limiter = GetConcurrencyLimiter(sName + "#quota#logstore#" + key); sLogstoreConcurrencyLimiterMap.try_emplace(key, limiter); return limiter; } if (iter->second.expired()) { - auto limiter = GetConcurrencyLimiter(); + auto limiter = GetConcurrencyLimiter(sName + "#quota#logstore#" + key); iter->second = limiter; return limiter; } @@ -145,12 +139,12 @@ shared_ptr FlusherSLS::GetProjectConcurrencyLimiter(const st lock_guard lock(sMux); auto iter = sProjectConcurrencyLimiterMap.find(project); if (iter == sProjectConcurrencyLimiterMap.end()) { - auto limiter = GetConcurrencyLimiter(); + auto limiter = GetConcurrencyLimiter(sName + "#quota#project#" + project); sProjectConcurrencyLimiterMap.try_emplace(project, limiter); return limiter; } if (iter->second.expired()) { - auto limiter = GetConcurrencyLimiter(); + auto limiter = GetConcurrencyLimiter(sName + "#quota#project#" + project); iter->second = limiter; return limiter; } @@ -161,12 +155,12 @@ shared_ptr FlusherSLS::GetRegionConcurrencyLimiter(const str lock_guard lock(sMux); auto iter = sRegionConcurrencyLimiterMap.find(region); if (iter == sRegionConcurrencyLimiterMap.end()) { - auto limiter = GetConcurrencyLimiter(); + auto limiter = GetConcurrencyLimiter(sName + "#network#region#" + region); sRegionConcurrencyLimiterMap.try_emplace(region, limiter); return limiter; } if (iter->second.expired()) { - auto limiter = GetConcurrencyLimiter(); + auto limiter = GetConcurrencyLimiter(sName + "#network#region#" + region); iter->second = limiter; return limiter; } @@ -583,12 +577,12 @@ bool FlusherSLS::FlushAll() { return SerializeAndPush(std::move(res)); } -unique_ptr FlusherSLS::BuildRequest(SenderQueueItem* item) const { +bool FlusherSLS::BuildRequest(SenderQueueItem* item, unique_ptr& req, bool* keepItem) const { auto data = static_cast(item); - static int32_t lastResetEndpointTime = 0; sdk::Client* sendClient = SLSClientManager::GetInstance()->GetClient(mRegion, mAliuid); - int32_t curTime = time(NULL); + int32_t curTime = time(NULL); + static int32_t lastResetEndpointTime = 0; data->mCurrentEndpoint = sendClient->GetRawSlsHost(); if (data->mCurrentEndpoint.empty()) { if (curTime - lastResetEndpointTime >= 30) { @@ -609,41 +603,46 @@ unique_ptr FlusherSLS::BuildRequest(SenderQueueItem* item) cons if (data->mType == RawDataType::EVENT_GROUP) { if (mTelemetryType == sls_logs::SLS_TELEMETRY_TYPE_METRICS) { - return sendClient->CreatePostMetricStoreLogsRequest( + req = sendClient->CreatePostMetricStoreLogsRequest( mProject, data->mLogstore, ConvertCompressType(GetCompressType()), data->mData, data->mRawSize, item); } else { if (data->mShardHashKey.empty()) { - return sendClient->CreatePostLogStoreLogsRequest(mProject, - data->mLogstore, - ConvertCompressType(GetCompressType()), - data->mData, - data->mRawSize, - item); + req = sendClient->CreatePostLogStoreLogsRequest(mProject, + data->mLogstore, + ConvertCompressType(GetCompressType()), + data->mData, + data->mRawSize, + item); } else { auto& exactlyOnceCpt = data->mExactlyOnceCheckpoint; int64_t hashKeySeqID = exactlyOnceCpt ? exactlyOnceCpt->data.sequence_id() : sdk::kInvalidHashKeySeqID; - return sendClient->CreatePostLogStoreLogsRequest(mProject, - data->mLogstore, - ConvertCompressType(GetCompressType()), - data->mData, - data->mRawSize, - item, - data->mShardHashKey, - hashKeySeqID); + req = sendClient->CreatePostLogStoreLogsRequest(mProject, + data->mLogstore, + ConvertCompressType(GetCompressType()), + data->mData, + data->mRawSize, + item, + data->mShardHashKey, + hashKeySeqID); } } } else { if (data->mShardHashKey.empty()) - return sendClient->CreatePostLogStoreLogPackageListRequest( + req = sendClient->CreatePostLogStoreLogPackageListRequest( mProject, data->mLogstore, ConvertCompressType(GetCompressType()), data->mData, item); else - return sendClient->CreatePostLogStoreLogPackageListRequest(mProject, - data->mLogstore, - ConvertCompressType(GetCompressType()), - data->mData, - item, - data->mShardHashKey); + req = sendClient->CreatePostLogStoreLogPackageListRequest(mProject, + data->mLogstore, + ConvertCompressType(GetCompressType()), + data->mData, + item, + data->mShardHashKey); } + if (!req) { + *keepItem = true; + return false; + } + return true; } void FlusherSLS::OnSendDone(const HttpResponse& response, SenderQueueItem* item) { @@ -674,6 +673,7 @@ void FlusherSLS::OnSendDone(const HttpResponse& response, SenderQueueItem* item) bool isProfileData = GetProfileSender()->IsProfileData(mRegion, mProject, data->mLogstore); int32_t curTime = time(NULL); auto curSystemTime = chrono::system_clock::now(); + bool hasAuthError = false; if (slsResponse.mStatusCode == 200) { auto& cpt = data->mExactlyOnceCheckpoint; if (cpt) { @@ -769,28 +769,10 @@ void FlusherSLS::OnSendDone(const HttpResponse& response, SenderQueueItem* item) operation = OperationOnFail::RETRY_LATER; } else if (sendResult == SEND_UNAUTHORIZED) { failDetail << "write unauthorized"; - suggestion << "check https connection to endpoint or access keys provided"; - if (data->mTryCnt > static_cast(INT32_FLAG(unauthorized_send_retrytimes))) { - operation = OperationOnFail::DISCARD; - } else { - BOOL_FLAG(global_network_success) = true; -#ifdef __ENTERPRISE__ - if (mAliuid.empty() && !EnterpriseConfigProvider::GetInstance()->IsPubRegion()) { - operation = OperationOnFail::RETRY_IMMEDIATELY; - } else { -#endif - int32_t lastUpdateTime; - sdk::Client* sendClient = SLSClientManager::GetInstance()->GetClient(mRegion, mAliuid); - if (SLSControl::GetInstance()->SetSlsSendClientAuth(mAliuid, false, sendClient, lastUpdateTime)) - operation = OperationOnFail::RETRY_IMMEDIATELY; - else if (curTime - lastUpdateTime < INT32_FLAG(unauthorized_allowed_delay_after_reset)) - operation = OperationOnFail::RETRY_LATER; - else - operation = OperationOnFail::DISCARD; -#ifdef __ENTERPRISE__ - } -#endif - } + suggestion << "check access keys provided"; + operation = OperationOnFail::RETRY_LATER; + BOOL_FLAG(global_network_success) = true; + hasAuthError = true; if (mUnauthErrorCnt) { mUnauthErrorCnt->Add(1); } @@ -913,6 +895,7 @@ void FlusherSLS::OnSendDone(const HttpResponse& response, SenderQueueItem* item) break; } } + SLSClientManager::GetInstance()->UpdateAccessKeyStatus(mAliuid, !hasAuthError); } bool FlusherSLS::Send(string&& data, const string& shardHashKey, const string& logstore) { diff --git a/core/plugin/flusher/sls/FlusherSLS.h b/core/plugin/flusher/sls/FlusherSLS.h index 42446b3813..c874a71f1e 100644 --- a/core/plugin/flusher/sls/FlusherSLS.h +++ b/core/plugin/flusher/sls/FlusherSLS.h @@ -65,7 +65,7 @@ class FlusherSLS : public HttpFlusher { bool Send(PipelineEventGroup&& g) override; bool Flush(size_t key) override; bool FlushAll() override; - std::unique_ptr BuildRequest(SenderQueueItem* item) const override; + bool BuildRequest(SenderQueueItem* item, std::unique_ptr& req, bool* keepItem) const override; void OnSendDone(const HttpResponse& response, SenderQueueItem* item) override; CompressType GetCompressType() const { return mCompressor ? mCompressor->GetCompressType() : CompressType::NONE; } diff --git a/core/plugin/flusher/sls/SLSClientManager.cpp b/core/plugin/flusher/sls/SLSClientManager.cpp index 697dedb45b..7e7930a547 100644 --- a/core/plugin/flusher/sls/SLSClientManager.cpp +++ b/core/plugin/flusher/sls/SLSClientManager.cpp @@ -14,18 +14,32 @@ #include "plugin/flusher/sls/SLSClientManager.h" +#ifdef __linux__ +#include +#endif + +#include + #include "app_config/AppConfig.h" #include "common/EndpointUtil.h" #include "common/Flags.h" #include "common/LogtailCommonFlags.h" #include "common/StringTools.h" #include "common/TimeUtil.h" +#include "common/version.h" #include "logger/Logger.h" #include "monitor/Monitor.h" +#ifdef __ENTERPRISE__ +#include "plugin/flusher/sls/EnterpriseSLSClientManager.h" +#endif #include "plugin/flusher/sls/FlusherSLS.h" #include "plugin/flusher/sls/SendResult.h" #include "sdk/Exception.h" -#include "sls_control/SLSControl.h" + +// for windows compatability, to avoid conflict with the same function defined in windows.h +#ifdef SetPort +#undef SetPort +#endif DEFINE_FLAG_STRING(data_endpoint_policy, "policy for switching between data server endpoints, possible options include " @@ -37,9 +51,9 @@ DEFINE_FLAG_INT32(test_network_normal_interval, "if last check is normal, test n DEFINE_FLAG_INT32(test_unavailable_endpoint_interval, "test unavailable endpoint interval", 60); DEFINE_FLAG_INT32(send_switch_real_ip_interval, "seconds", 60); DEFINE_FLAG_BOOL(send_prefer_real_ip, "use real ip to send data", false); - -DECLARE_FLAG_STRING(default_access_key_id); -DECLARE_FLAG_STRING(default_access_key); +DEFINE_FLAG_STRING(default_access_key_id, "", ""); +DEFINE_FLAG_STRING(default_access_key, "", ""); +DEFINE_FLAG_STRING(custom_user_agent, "custom user agent appended at the end of the exsiting ones", ""); using namespace std; @@ -115,26 +129,30 @@ std::string SLSClientManager::RegionEndpointsInfo::GetAvailableEndpointWithTopPr return mDefaultEndpoint; } +SLSClientManager* SLSClientManager::GetInstance() { +#ifdef __ENTERPRISE__ + static auto ptr = unique_ptr(new EnterpriseSLSClientManager()); +#else + static auto ptr = unique_ptr(new SLSClientManager()); +#endif + return ptr.get(); +} + void SLSClientManager::Init() { InitEndpointSwitchPolicy(); + GenerateUserAgent(); if (mDataServerSwitchPolicy == EndpointSwitchPolicy::DESIGNATED_FIRST) { mProbeNetworkClient.reset(new sdk::Client("", - STRING_FLAG(default_access_key_id), - STRING_FLAG(default_access_key), - INT32_FLAG(sls_client_send_timeout), - LoongCollectorMonitor::mIpAddr, - AppConfig::GetInstance()->GetBindInterface())); - SLSControl::GetInstance()->SetSlsSendClientCommonParam(mProbeNetworkClient.get()); + "", + INT32_FLAG(sls_client_send_timeout))); + mProbeNetworkClient->SetPort(AppConfig::GetInstance()->GetDataServerPort()); mProbeNetworkThreadRes = async(launch::async, &SLSClientManager::ProbeNetworkThread, this); } if (BOOL_FLAG(send_prefer_real_ip)) { mUpdateRealIpClient.reset(new sdk::Client("", - STRING_FLAG(default_access_key_id), - STRING_FLAG(default_access_key), - INT32_FLAG(sls_client_send_timeout), - LoongCollectorMonitor::mIpAddr, - AppConfig::GetInstance()->GetBindInterface())); - SLSControl::GetInstance()->SetSlsSendClientCommonParam(mUpdateRealIpClient.get()); + "", + INT32_FLAG(sls_client_send_timeout))); + mUpdateRealIpClient->SetPort(AppConfig::GetInstance()->GetDataServerPort()); mUpdateRealIpThreadRes = async(launch::async, &SLSClientManager::UpdateRealIpThread, this); } } @@ -228,25 +246,19 @@ sdk::Client* SLSClientManager::GetClient(const string& region, const string& ali } string endpoint = GetAvailableEndpointWithTopPriority(region); - unique_ptr client = make_unique(endpoint, - "", - "", - INT32_FLAG(sls_client_send_timeout), - LoongCollectorMonitor::mIpAddr, - AppConfig::GetInstance()->GetBindInterface()); - SLSControl::GetInstance()->SetSlsSendClientCommonParam(client.get()); + auto client = make_unique(aliuid, + endpoint, + INT32_FLAG(sls_client_send_timeout)); ResetClientPort(region, client.get()); LOG_INFO(sLogger, ("init endpoint for sender, region", region)("uid", aliuid)("hostname", GetHostFromEndpoint(endpoint))( "use https", ToString(client->IsUsingHTTPS()))); - int32_t lastUpdateTime; - SLSControl::GetInstance()->SetSlsSendClientAuth(aliuid, true, client.get(), lastUpdateTime); - sdk::Client* res = client.get(); + auto ptr = client.get(); { lock_guard lock(mClientMapMux); mClientMap.insert(make_pair(key, make_pair(std::move(client), time(nullptr)))); } - return res; + return ptr; } bool SLSClientManager::ResetClientEndpoint(const string& aliuid, const string& region, time_t curTime) { @@ -276,6 +288,7 @@ bool SLSClientManager::ResetClientEndpoint(const string& aliuid, const string& r } void SLSClientManager::ResetClientPort(const string& region, sdk::Client* sendClient) { + sendClient->SetPort(AppConfig::GetInstance()->GetDataServerPort()); if (AppConfig::GetInstance()->GetDataServerPort() == 80) { lock_guard lock(mRegionEndpointEntryMapLock); auto iter = mRegionEndpointEntryMap.find(region); @@ -306,6 +319,16 @@ void SLSClientManager::CleanTimeoutClient() { } } +bool SLSClientManager::GetAccessKey(const std::string& aliuid, + AuthType& type, + std::string& accessKeyId, + std::string& accessKeySecret) { + accessKeyId = STRING_FLAG(default_access_key_id); + accessKeySecret = STRING_FLAG(default_access_key); + type = AuthType::AK; + return true; +} + void SLSClientManager::AddEndpointEntry(const string& region, const string& endpoint, bool isProxy, @@ -647,5 +670,99 @@ void SLSClientManager::SetRealIp(const string& region, const string& ip) { pInfo->SetRealIp(ip); } +void SLSClientManager::GenerateUserAgent() { + string os; +#if defined(__linux__) + utsname* buf = new utsname; + if (-1 == uname(buf)) { + LOG_WARNING( + sLogger, + ("get os info part of user agent failed", errno)("use default os info", LoongCollectorMonitor::mOsDetail)); + os = LoongCollectorMonitor::mOsDetail; + } else { + char* pch = strchr(buf->release, '-'); + if (pch) { + *pch = '\0'; + } + os.append(buf->sysname); + os.append("; "); + os.append(buf->release); + os.append("; "); + os.append(buf->machine); + } + delete buf; +#elif defined(_MSC_VER) + os = LoongCollectorMonitor::mOsDetail; +#endif + + mUserAgent = string("ilogtail/") + ILOGTAIL_VERSION + " (" + os + ") ip/" + LoongCollectorMonitor::mIpAddr + " env/" + + GetRunningEnvironment(); + if (!STRING_FLAG(custom_user_agent).empty()) { + mUserAgent += " " + STRING_FLAG(custom_user_agent); + } + LOG_INFO(sLogger, ("user agent", mUserAgent)); +} + +string SLSClientManager::GetRunningEnvironment() { + string env; + if (getenv("ALIYUN_LOG_STATIC_CONTAINER_INFO")) { + env = "ECI"; + } else if (getenv("ACK_NODE_LOCAL_DNS_ADMISSION_CONTROLLER_SERVICE_HOST")) { + // logtail-ds installed by ACK will possess the above env + env = "ACK-Daemonset"; + } else if (getenv("KUBERNETES_SERVICE_HOST")) { + // containers in K8S will possess the above env + if (AppConfig::GetInstance()->IsPurageContainerMode()) { + env = "K8S-Daemonset"; + } else if (TryCurlEndpoint("http://100.100.100.200/latest/meta-data")) { + // containers in ACK can be connected to the above address, see + // https://help.aliyun.com/document_detail/108460.html#section-akf-lwh-1gb. + // Note: we can not distinguish ACK from K8S built on ECS + env = "ACK-Sidecar"; + } else { + env = "K8S-Sidecar"; + } + } else if (AppConfig::GetInstance()->IsPurageContainerMode() || getenv("ALIYUN_LOGTAIL_CONFIG")) { + env = "Docker"; + } else if (TryCurlEndpoint("http://100.100.100.200/latest/meta-data")) { + env = "ECS"; + } else { + env = "Others"; + } + return env; +} + +bool SLSClientManager::TryCurlEndpoint(const string& endpoint) { + CURL* curl; + for (size_t retryTimes = 1; retryTimes <= 5; retryTimes++) { + curl = curl_easy_init(); + if (curl) { + break; + } + this_thread::sleep_for(chrono::seconds(1)); + } + + if (curl) { + curl_easy_setopt(curl, CURLOPT_URL, endpoint.c_str()); + curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L); + + if (curl_easy_perform(curl) != CURLE_OK) { + curl_easy_cleanup(curl); + return false; + } + curl_easy_cleanup(curl); + return true; + } + + LOG_WARNING( + sLogger, + ("curl handler cannot be initialized during user environment identification", "user agent may be mislabeled")); + return false; +} } // namespace logtail diff --git a/core/plugin/flusher/sls/SLSClientManager.h b/core/plugin/flusher/sls/SLSClientManager.h index 32961ebaf9..c82a8a8cb1 100644 --- a/core/plugin/flusher/sls/SLSClientManager.h +++ b/core/plugin/flusher/sls/SLSClientManager.h @@ -21,8 +21,10 @@ #include #include #include +#include #include #include +#include #include "sdk/Client.h" @@ -32,19 +34,19 @@ class SLSClientManager { public: enum class EndpointSourceType { LOCAL, REMOTE }; enum class EndpointSwitchPolicy { DESIGNATED_FIRST, DESIGNATED_LOCKED }; + enum class AuthType { ANONYMOUS, AK }; + virtual ~SLSClientManager() = default; SLSClientManager(const SLSClientManager&) = delete; SLSClientManager& operator=(const SLSClientManager&) = delete; - static SLSClientManager* GetInstance() { - static SLSClientManager instance; - return &instance; - } + static SLSClientManager* GetInstance(); void Init(); void Stop(); EndpointSwitchPolicy GetServerSwitchPolicy() const { return mDataServerSwitchPolicy; } + const std::string& GetUserAgent() const { return mUserAgent; } void IncreaseAliuidReferenceCntForRegion(const std::string& region, const std::string& aliuid); void DecreaseAliuidReferenceCntForRegion(const std::string& region, const std::string& aliuid); @@ -52,6 +54,9 @@ class SLSClientManager { sdk::Client* GetClient(const std::string& region, const std::string& aliuid, bool createIfNotFound = true); bool ResetClientEndpoint(const std::string& aliuid, const std::string& region, time_t curTime); void CleanTimeoutClient(); + virtual bool + GetAccessKey(const std::string& aliuid, AuthType& type, std::string& accessKeyId, std::string& accessKeySecret); + virtual void UpdateAccessKeyStatus(const std::string& aliuid, bool success) {} void AddEndpointEntry(const std::string& region, const std::string& endpoint, @@ -68,6 +73,14 @@ class SLSClientManager { std::string GetRegionFromEndpoint(const std::string& endpoint); // for backward compatibility bool HasNetworkAvailable(); // TODO: remove this function +protected: + SLSClientManager() = default; + + virtual std::string GetRunningEnvironment(); + bool TryCurlEndpoint(const std::string& endpoint); + + std::string mUserAgent; + private: enum class EndpointStatus { STATUS_OK_WITH_IP, STATUS_OK_WITH_ENDPOINT, STATUS_ERROR }; @@ -111,9 +124,7 @@ class SLSClientManager { } }; - SLSClientManager() = default; - ~SLSClientManager() = default; - + virtual void GenerateUserAgent(); void InitEndpointSwitchPolicy(); std::vector GetRegionAliuids(const std::string& region); diff --git a/core/runner/FlusherRunner.cpp b/core/runner/FlusherRunner.cpp index 42f3c604b7..38b09d78de 100644 --- a/core/runner/FlusherRunner.cpp +++ b/core/runner/FlusherRunner.cpp @@ -34,6 +34,8 @@ DEFINE_FLAG_INT32(flusher_runner_exit_timeout_secs, "", 60); DEFINE_FLAG_INT32(check_send_client_timeout_interval, "", 600); +DECLARE_FLAG_INT32(discard_send_fail_interval); + using namespace std; namespace logtail { @@ -117,7 +119,25 @@ void FlusherRunner::PushToHttpSink(SenderQueueItem* item, bool withLimit) { this_thread::sleep_for(chrono::milliseconds(10)); } - auto req = static_cast(item->mFlusher)->BuildRequest(item); + unique_ptr req; + bool keepItem = false; + if (!static_cast(item->mFlusher)->BuildRequest(item, req, &keepItem)) { + if (keepItem + && chrono::duration_cast(chrono::system_clock::now() - item->mFirstEnqueTime).count() + < INT32_FLAG(discard_send_fail_interval)) { + item->mStatus = SendingStatus::IDLE; + LOG_DEBUG(sLogger, + ("failed to build request", "retry later")("item address", item)( + "config-flusher-dst", QueueKeyManager::GetInstance()->GetName(item->mQueueKey))); + } else { + LOG_WARNING(sLogger, + ("failed to build request", "discard item")("item address", item)( + "config-flusher-dst", QueueKeyManager::GetInstance()->GetName(item->mQueueKey))); + SenderQueueManager::GetInstance()->RemoveItem(item->mQueueKey, item); + } + return; + } + req->mEnqueTime = item->mLastSendTime = chrono::system_clock::now(); HttpSink::GetInstance()->AddRequest(std::move(req)); ++mHttpSendingCnt; diff --git a/core/runner/sink/http/HttpSink.cpp b/core/runner/sink/http/HttpSink.cpp index b7ef0beac7..2bd2e77cb9 100644 --- a/core/runner/sink/http/HttpSink.cpp +++ b/core/runner/sink/http/HttpSink.cpp @@ -245,17 +245,17 @@ void HttpSink::HandleCompletedRequests(int& runningHandlers) { long statusCode = 0; curl_easy_getinfo(handler, CURLINFO_RESPONSE_CODE, &statusCode); request->mResponse.SetStatusCode(statusCode); - static_cast(request->mItem->mFlusher)->OnSendDone(request->mResponse, request->mItem); - FlusherRunner::GetInstance()->DecreaseHttpSendingCnt(); - mOutSuccessfulItemsTotal->Add(1); - mSuccessfulItemTotalResponseTimeMs->Add(responseTime); - mSendingItemsTotal->Sub(1); LOG_DEBUG( sLogger, ("send http request succeeded, item address", request->mItem)( "config-flusher-dst", QueueKeyManager::GetInstance()->GetName(request->mItem->mQueueKey))( "response time", ToString(responseTimeMs) + "ms")("try cnt", ToString(request->mTryCnt))( "sending cnt", ToString(FlusherRunner::GetInstance()->GetSendingBufferCount()))); + static_cast(request->mItem->mFlusher)->OnSendDone(request->mResponse, request->mItem); + FlusherRunner::GetInstance()->DecreaseHttpSendingCnt(); + mOutSuccessfulItemsTotal->Add(1); + mSuccessfulItemTotalResponseTimeMs->Add(responseTime); + mSendingItemsTotal->Sub(1); break; } default: @@ -277,9 +277,6 @@ void HttpSink::HandleCompletedRequests(int& runningHandlers) { ++runningHandlers; requestReused = true; } else { - static_cast(request->mItem->mFlusher) - ->OnSendDone(request->mResponse, request->mItem); - FlusherRunner::GetInstance()->DecreaseHttpSendingCnt(); LOG_DEBUG(sLogger, ("failed to send http request", "abort")("item address", request->mItem)( "config-flusher-dst", @@ -287,6 +284,9 @@ void HttpSink::HandleCompletedRequests(int& runningHandlers) { "response time", ToString(responseTimeMs) + "ms")("try cnt", ToString(request->mTryCnt))( "sending cnt", ToString(FlusherRunner::GetInstance()->GetSendingBufferCount()))); + static_cast(request->mItem->mFlusher) + ->OnSendDone(request->mResponse, request->mItem); + FlusherRunner::GetInstance()->DecreaseHttpSendingCnt(); } mOutFailedItemsTotal->Add(1); mFailedItemTotalResponseTimeMs->Add(responseTime); diff --git a/core/sdk/Client.cpp b/core/sdk/Client.cpp index bc1cc16465..c99d4dd71f 100644 --- a/core/sdk/Client.cpp +++ b/core/sdk/Client.cpp @@ -19,74 +19,21 @@ #include "Exception.h" #include "Result.h" #include "logger/Logger.h" +#include "plugin/flusher/sls/SLSClientManager.h" +#include "app_config/AppConfig.h" +#include "monitor/Monitor.h" namespace logtail { namespace sdk { using namespace std; -#define LOG_SDK_IDENTIFICATION "ali-log-logtail" - - - static string GetHostIp(const std::string& intf) { - // @todo - return "127.0.0.1"; - } - - Client::Client(const string& slsHost, - const string& accessKeyId, - const string& accessKey, - int32_t timeout, - const string& source, - const string& intf) - : mAccessKeyId(accessKeyId), - mAccessKey(accessKey), - mSource(source), - mTimeout(timeout), - mUserAgent(LOG_SDK_IDENTIFICATION), - mKeyProvider(""), - mHostFieldSuffix(""), - mIsHostRawIp(false), - mPort(80), - mUsingHTTPS(false) { - mClient = new CurlClient(); - mInterface = intf; - mSlsHostUpdateTime = 0; - mSlsRealIpUpdateTime = 0; - SetSlsHost(slsHost); - if (mSource.empty()) { - mSource = GetHostIp(mInterface); - } - if (mTimeout <= 0) { - mTimeout = LOG_REQUEST_TIMEOUT; - } - } - Client::Client(const string& slsHost, - const string& accessKeyId, - const string& accessKey, - const std::string& securityToken, - int32_t timeout, - const string& source, - const string& intf) - : mAccessKeyId(accessKeyId), - mAccessKey(accessKey), - mSecurityToken(securityToken), - mSource(source), - mTimeout(timeout), - mUserAgent(LOG_SDK_IDENTIFICATION), - mKeyProvider(""), - mHostFieldSuffix(""), - mIsHostRawIp(false), - mPort(80), - mUsingHTTPS(false) { + Client::Client(const string& aliuid, const string& slsHost, int32_t timeout) + : mTimeout(timeout), mHostFieldSuffix(""), mIsHostRawIp(false), mPort(80), mUsingHTTPS(false), mAliuid(aliuid) { mClient = new CurlClient(); - mInterface = intf; mSlsHostUpdateTime = 0; mSlsRealIpUpdateTime = 0; SetSlsHost(slsHost); - if (mSource.empty()) { - mSource = GetHostIp(mInterface); - } if (mTimeout <= 0) { mTimeout = LOG_REQUEST_TIMEOUT; } @@ -103,33 +50,6 @@ namespace sdk { mUsingHTTPS = (443 == mPort); } - - void Client::SetAccessKey(const string& accessKey) { - mSpinLock.lock(); - mAccessKey = accessKey; - mSpinLock.unlock(); - } - - string Client::GetAccessKey() { - mSpinLock.lock(); - string accessKey = mAccessKey; - mSpinLock.unlock(); - return accessKey; - } - - void Client::SetAccessKeyId(const string& accessKeyId) { - mSpinLock.lock(); - mAccessKeyId = accessKeyId; - mSpinLock.unlock(); - } - - string Client::GetAccessKeyId() { - mSpinLock.lock(); - string accessKeyId = mAccessKeyId; - mSpinLock.unlock(); - return accessKeyId; - } - string Client::GetSlsHost() { mSpinLock.lock(); string slsHost = mSlsHost; @@ -224,9 +144,6 @@ namespace sdk { bool isTimeSeries) { map httpHeader; httpHeader[CONTENT_TYPE] = TYPE_LOG_PROTOBUF; - if (!mKeyProvider.empty()) { - httpHeader[X_LOG_KEYPROVIDER] = mKeyProvider; - } httpHeader[X_LOG_BODYRAWSIZE] = std::to_string(rawSize); httpHeader[X_LOG_COMPRESSTYPE] = Client::GetCompressTypeString(compressType); if (isTimeSeries) { @@ -243,9 +160,6 @@ namespace sdk { const std::string& hashKey) { map httpHeader; httpHeader[CONTENT_TYPE] = TYPE_LOG_PROTOBUF; - if (!mKeyProvider.empty()) { - httpHeader[X_LOG_KEYPROVIDER] = mKeyProvider; - } httpHeader[X_LOG_MODE] = LOG_MODE_BATCH_GROUP; httpHeader[X_LOG_BODYRAWSIZE] = std::to_string(packageListData.size()); httpHeader[X_LOG_COMPRESSTYPE] = Client::GetCompressTypeString(compressType); @@ -263,14 +177,10 @@ namespace sdk { bool isTimeSeries) { map httpHeader; httpHeader[CONTENT_TYPE] = TYPE_LOG_PROTOBUF; - if (!mKeyProvider.empty()) { - httpHeader[X_LOG_KEYPROVIDER] = mKeyProvider; - } httpHeader[X_LOG_BODYRAWSIZE] = std::to_string(rawSize); httpHeader[X_LOG_COMPRESSTYPE] = Client::GetCompressTypeString(compressType); if (isTimeSeries) { - return CreateAsynPostMetricStoreLogsRequest( - project, logstore, compressedLogGroup, httpHeader,item); + return CreateAsynPostMetricStoreLogsRequest(project, logstore, compressedLogGroup, httpHeader, item); } else { return CreateAsynPostLogStoreLogsRequest( project, logstore, compressedLogGroup, httpHeader, hashKey, hashKeySeqID, item); @@ -286,9 +196,6 @@ namespace sdk { const std::string& hashKey) { map httpHeader; httpHeader[CONTENT_TYPE] = TYPE_LOG_PROTOBUF; - if (!mKeyProvider.empty()) { - httpHeader[X_LOG_KEYPROVIDER] = mKeyProvider; - } httpHeader[X_LOG_MODE] = LOG_MODE_BATCH_GROUP; httpHeader[X_LOG_BODYRAWSIZE] = std::to_string(packageListData.size()); httpHeader[X_LOG_COMPRESSTYPE] = Client::GetCompressTypeString(compressType); @@ -304,10 +211,19 @@ namespace sdk { std::map& header, HttpMessage& httpMessage, std::string* realIpPtr) { + SLSClientManager::AuthType type; + string accessKeyId, accessKeySecret; + if (!SLSClientManager::GetInstance()->GetAccessKey(mAliuid, type, accessKeyId, accessKeySecret)) { + throw LOGException(LOGE_UNAUTHORIZED, ""); + } + if (type == SLSClientManager::AuthType::ANONYMOUS) { + header[X_LOG_KEYPROVIDER] = MD5_SHA1_SALT_KEYPROVIDER; + } + string host = GetHost(project); SetCommonHeader(header, (int32_t)(body.length()), project); - string signature = GetUrlSignature(httpMethod, url, header, parameterList, body, GetAccessKey()); - header[AUTHORIZATION] = LOG_HEADSIGNATURE_PREFIX + GetAccessKeyId() + ':' + signature; + string signature = GetUrlSignature(httpMethod, url, header, parameterList, body, accessKeySecret); + header[AUTHORIZATION] = LOG_HEADSIGNATURE_PREFIX + accessKeyId + ':' + signature; string queryString; GetQueryString(parameterList, queryString); @@ -317,7 +233,7 @@ namespace sdk { port = 443; } mClient->Send( - httpMethod, host, port, url, queryString, header, body, mTimeout, httpMessage, mInterface, mUsingHTTPS); + httpMethod, host, port, url, queryString, header, body, mTimeout, httpMessage, AppConfig::GetInstance()->GetBindInterface(), mUsingHTTPS); if (httpMessage.statusCode != 200) { if (realIpPtr != NULL) { @@ -333,16 +249,26 @@ namespace sdk { const std::string& body, std::map& httpHeader, SenderQueueItem* item) { + SLSClientManager::AuthType type; + string accessKeyId, accessKeySecret; + if (!SLSClientManager::GetInstance()->GetAccessKey(mAliuid, type, accessKeyId, accessKeySecret)) { + return nullptr; + } + if (type == SLSClientManager::AuthType::ANONYMOUS) { + httpHeader[X_LOG_KEYPROVIDER] = MD5_SHA1_SALT_KEYPROVIDER; + } + string operation = METRICSTORES; operation.append("/").append(project).append("/").append(logstore).append("/api/v1/write"); httpHeader[CONTENT_MD5] = CalcMD5(body); map parameterList; string host = GetSlsHost(); SetCommonHeader(httpHeader, (int32_t)(body.length()), ""); - string signature = GetUrlSignature(HTTP_POST, operation, httpHeader, parameterList, body, GetAccessKey()); - httpHeader[AUTHORIZATION] = LOG_HEADSIGNATURE_PREFIX + GetAccessKeyId() + ':' + signature; + string signature = GetUrlSignature(HTTP_POST, operation, httpHeader, parameterList, body, accessKeySecret); + httpHeader[AUTHORIZATION] = LOG_HEADSIGNATURE_PREFIX + accessKeyId + ':' + signature; return make_unique(HTTP_POST, mUsingHTTPS, host, mPort, operation, "", httpHeader, body, item); } + unique_ptr Client::CreateAsynPostLogStoreLogsRequest(const std::string& project, const std::string& logstore, @@ -351,6 +277,15 @@ namespace sdk { const std::string& hashKey, int64_t hashKeySeqID, SenderQueueItem* item) { + SLSClientManager::AuthType type; + string accessKeyId, accessKeySecret; + if (!SLSClientManager::GetInstance()->GetAccessKey(mAliuid, type, accessKeyId, accessKeySecret)) { + return nullptr; + } + if (type == SLSClientManager::AuthType::ANONYMOUS) { + httpHeader[X_LOG_KEYPROVIDER] = MD5_SHA1_SALT_KEYPROVIDER; + } + string operation = LOGSTORES; operation.append("/").append(logstore); if (hashKey.empty()) @@ -370,8 +305,8 @@ namespace sdk { string host = GetHost(project); SetCommonHeader(httpHeader, (int32_t)(body.length()), project); - string signature = GetUrlSignature(HTTP_POST, operation, httpHeader, parameterList, body, GetAccessKey()); - httpHeader[AUTHORIZATION] = LOG_HEADSIGNATURE_PREFIX + GetAccessKeyId() + ':' + signature; + string signature = GetUrlSignature(HTTP_POST, operation, httpHeader, parameterList, body, accessKeySecret); + httpHeader[AUTHORIZATION] = LOG_HEADSIGNATURE_PREFIX + accessKeyId + ':' + signature; string queryString; GetQueryString(parameterList, queryString); @@ -383,14 +318,11 @@ namespace sdk { PostLogStoreLogsResponse Client::PingSLSServer(const std::string& project, const std::string& logstore, std::string* realIpPtr) { sls_logs::LogGroup logGroup; - logGroup.set_source(mSource); + logGroup.set_source(LoongCollectorMonitor::mIpAddr); auto serializeData = logGroup.SerializeAsString(); std::map httpHeader; httpHeader[CONTENT_TYPE] = TYPE_LOG_PROTOBUF; - if (!mKeyProvider.empty()) { - httpHeader[X_LOG_KEYPROVIDER] = mKeyProvider; - } httpHeader[X_LOG_BODYRAWSIZE] = std::to_string(serializeData.size()); return SynPostLogStoreLogs(project, logstore, serializeData, httpHeader, "", realIpPtr); } @@ -471,7 +403,7 @@ namespace sdk { compressedLog, mTimeout, httpResponse, - mInterface, + AppConfig::GetInstance()->GetBindInterface(), mUsingHTTPS); PostLogStoreLogsResponse ret; @@ -488,14 +420,11 @@ namespace sdk { httpHeader[HOST] = GetSlsHost(); } - httpHeader[USER_AGENT] = mUserAgent; + httpHeader[USER_AGENT] = SLSClientManager::GetInstance()->GetUserAgent(); httpHeader[X_LOG_APIVERSION] = LOG_API_VERSION; httpHeader[X_LOG_SIGNATUREMETHOD] = HMAC_SHA1; httpHeader[DATE] = GetDateString(); httpHeader[CONTENT_LENGTH] = std::to_string(contentLength); - if (!mSecurityToken.empty()) { - httpHeader[X_ACS_SECURITY_TOKEN] = mSecurityToken; - } } std::string Client::GetCompressTypeString(sls_logs::SlsCompressType compressType) { diff --git a/core/sdk/Client.h b/core/sdk/Client.h index 54a6136c07..073f39b657 100644 --- a/core/sdk/Client.h +++ b/core/sdk/Client.h @@ -20,8 +20,8 @@ #include "Common.h" #include "CurlImp.h" -#include "runner/sink/http/HttpSinkRequest.h" #include "protobuf/sls/sls_logs.pb.h" +#include "runner/sink/http/HttpSinkRequest.h" namespace logtail { namespace sdk { @@ -30,27 +30,11 @@ namespace sdk { public: /** Constructor needs at least three parameters. * @param LOGHost LOG service address, for example:http://cn-hangzhou.log.aliyuncs.com. - * @param accessKeyId Aliyun AccessKeyId. - * @param accessKey Aliyun AccessKey Secret. * @param timeout Timeout time of one operation. - * @param source Source identifier used to differentiate data from different machines. If it is empty, - * constructor will use machine ip as its source. - * @param compressFlag The flag decides whether compresses the data or not when put data to LOG. - * @return The objcect pointer. */ - Client(const std::string& slsHost, - const std::string& accessKeyId, - const std::string& accessKey, - int32_t timeout = LOG_REQUEST_TIMEOUT, - const std::string& source = "", - const std::string& intf = ""); - Client(const std::string& slsHost, - const std::string& accessKeyId, - const std::string& accessKey, - const std::string& securityToken, - int32_t timeout = LOG_REQUEST_TIMEOUT, - const std::string& source = "", - const std::string& intf = ""); + Client(const std::string& aliuid, + const std::string& slsHost, + int32_t timeout = LOG_REQUEST_TIMEOUT); ~Client() throw(); void SetPort(int32_t port); @@ -60,25 +44,12 @@ namespace sdk { std::string GetHost(const std::string& project); - void SetUserAgent(const std::string& userAgent) { mUserAgent = userAgent; } - void SetKeyProvider(const std::string& keyProvider) { mKeyProvider = keyProvider; } - - void SetAccessKey(const std::string& accessKey); - std::string GetAccessKey(); - void SetAccessKeyId(const std::string& accessKeyId); - std::string GetAccessKeyId(); void SetSlsHost(const std::string& slsHost); std::string GetSlsHost(); std::string GetRawSlsHost(); std::string GetHostFieldSuffix(); bool GetRawSlsHostFlag(); - // @note not used - const std::string& GetSecurityToken() { return mSecurityToken; } - // @note not used - void SetSecurityToken(const std::string& securityToken) { mSecurityToken = securityToken; } - // @note not used - void RemoveSecurityToken() { SetSecurityToken(""); } void SetSlsHostUpdateTime(int32_t uptime) { mSlsHostUpdateTime = uptime; } int32_t GetSlsHostUpdateTime() { return mSlsHostUpdateTime; } @@ -235,18 +206,12 @@ namespace sdk { int32_t mSlsRealIpUpdateTime; std::string mRawSlsHost; std::string mSlsHost; - std::string mAccessKeyId; - std::string mAccessKey; - std::string mSecurityToken; - std::string mSource; int32_t mTimeout; - std::string mUserAgent; - std::string mKeyProvider; std::string mHostFieldSuffix; bool mIsHostRawIp; - std::string mInterface; int32_t mPort; bool mUsingHTTPS; + std::string mAliuid; SpinLock mSpinLock; diff --git a/core/sls_control/SLSControl.cpp b/core/sls_control/SLSControl.cpp deleted file mode 100644 index edc43ee76c..0000000000 --- a/core/sls_control/SLSControl.cpp +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright 2022 iLogtail Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "sls_control/SLSControl.h" - -#include - -#ifdef __linux__ -#include -#endif - -#include "app_config/AppConfig.h" -#include "common/Flags.h" -#include "common/version.h" -#include "curl/curl.h" -#include "logger/Logger.h" -#include "monitor/Monitor.h" -#ifdef __ENTERPRISE__ -#include "sls_control/EnterpriseSLSControl.h" -#endif - -// for windows compatability, to avoid conflict with the same function defined in windows.h -#ifdef SetPort -#undef SetPort -#endif - -DEFINE_FLAG_STRING(default_access_key_id, "", ""); -DEFINE_FLAG_STRING(default_access_key, "", ""); -DEFINE_FLAG_STRING(custom_user_agent, "custom user agent appended at the end of the exsiting ones", ""); - -using namespace std; - -namespace logtail { -SLSControl* SLSControl::GetInstance() { -#ifdef __ENTERPRISE__ - static SLSControl* ptr = new EnterpriseSLSControl(); -#else - static SLSControl* ptr = new SLSControl(); -#endif - return ptr; -} - -void SLSControl::Init() { - GenerateUserAgent(); -} - -void SLSControl::SetSlsSendClientCommonParam(sdk::Client* sendClient) { - sendClient->SetUserAgent(mUserAgent); - sendClient->SetPort(AppConfig::GetInstance()->GetDataServerPort()); -} - -bool SLSControl::SetSlsSendClientAuth(const string aliuid, - const bool init, - sdk::Client* sendClient, - int32_t& lastUpdateTime) { - sendClient->SetAccessKeyId(STRING_FLAG(default_access_key_id)); - sendClient->SetAccessKey(STRING_FLAG(default_access_key)); - LOG_INFO(sLogger, ("SetAccessKeyId", STRING_FLAG(default_access_key_id))); - return true; -} - -void SLSControl::GenerateUserAgent() { - string os; -#if defined(__linux__) - utsname* buf = new utsname; - if (-1 == uname(buf)) { - LOG_WARNING( - sLogger, - ("get os info part of user agent failed", errno)("use default os info", LoongCollectorMonitor::mOsDetail)); - os = LoongCollectorMonitor::mOsDetail; - } else { - char* pch = strchr(buf->release, '-'); - if (pch) { - *pch = '\0'; - } - os.append(buf->sysname); - os.append("; "); - os.append(buf->release); - os.append("; "); - os.append(buf->machine); - } - delete buf; -#elif defined(_MSC_VER) - os = LoongCollectorMonitor::mOsDetail; -#endif - - mUserAgent = string("ilogtail/") + ILOGTAIL_VERSION + " (" + os + ") ip/" + LoongCollectorMonitor::mIpAddr + " env/" - + GetRunningEnvironment(); - if (!STRING_FLAG(custom_user_agent).empty()) { - mUserAgent += " " + STRING_FLAG(custom_user_agent); - } - LOG_INFO(sLogger, ("user agent", mUserAgent)); -} - -string SLSControl::GetRunningEnvironment() { - string env; - if (getenv("ALIYUN_LOG_STATIC_CONTAINER_INFO")) { - env = "ECI"; - } else if (getenv("ACK_NODE_LOCAL_DNS_ADMISSION_CONTROLLER_SERVICE_HOST")) { - // logtail-ds installed by ACK will possess the above env - env = "ACK-Daemonset"; - } else if (getenv("KUBERNETES_SERVICE_HOST")) { - // containers in K8S will possess the above env - if (AppConfig::GetInstance()->IsPurageContainerMode()) { - env = "K8S-Daemonset"; - } else if (TryCurlEndpoint("http://100.100.100.200/latest/meta-data")) { - // containers in ACK can be connected to the above address, see - // https://help.aliyun.com/document_detail/108460.html#section-akf-lwh-1gb. - // Note: we can not distinguish ACK from K8S built on ECS - env = "ACK-Sidecar"; - } else { - env = "K8S-Sidecar"; - } - } else if (AppConfig::GetInstance()->IsPurageContainerMode() || getenv("ALIYUN_LOGTAIL_CONFIG")) { - env = "Docker"; - } else if (TryCurlEndpoint("http://100.100.100.200/latest/meta-data")) { - env = "ECS"; - } else { - env = "Others"; - } - return env; -} - -bool SLSControl::TryCurlEndpoint(const string& endpoint) { - CURL* curl; - for (size_t retryTimes = 1; retryTimes <= 5; retryTimes++) { - curl = curl_easy_init(); - if (curl) { - break; - } - this_thread::sleep_for(chrono::seconds(1)); - } - - if (curl) { - curl_easy_setopt(curl, CURLOPT_URL, endpoint.c_str()); - curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); - curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1); - curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3); - curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); - curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L); - - if (curl_easy_perform(curl) != CURLE_OK) { - curl_easy_cleanup(curl); - return false; - } - curl_easy_cleanup(curl); - return true; - } - - LOG_WARNING( - sLogger, - ("curl handler cannot be initialized during user environment identification", "user agent may be mislabeled")); - return false; -} - -} // namespace logtail diff --git a/core/sls_control/SLSControl.h b/core/sls_control/SLSControl.h deleted file mode 100644 index b50612aba3..0000000000 --- a/core/sls_control/SLSControl.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2022 iLogtail Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -#include "sdk/Client.h" - -namespace logtail { - -class SLSControl { -protected: - std::string mUserAgent; - - SLSControl() = default; - virtual ~SLSControl() = default; - - virtual void GenerateUserAgent(); - virtual std::string GetRunningEnvironment(); - bool TryCurlEndpoint(const std::string& endpoint); - -public: - SLSControl(const SLSControl&) = delete; - SLSControl& operator=(const SLSControl&) = delete; - - static SLSControl* GetInstance(); - - void Init(); - virtual void SetSlsSendClientCommonParam(sdk::Client* sendClient); - virtual bool - SetSlsSendClientAuth(const std::string aliuid, const bool init, sdk::Client* sendClient, int32_t& lastUpdateTime); -}; - -} // namespace logtail diff --git a/core/unittest/flusher/CMakeLists.txt b/core/unittest/flusher/CMakeLists.txt index 96500a74d7..6b912780b9 100644 --- a/core/unittest/flusher/CMakeLists.txt +++ b/core/unittest/flusher/CMakeLists.txt @@ -21,6 +21,14 @@ target_link_libraries(flusher_sls_unittest ${UT_BASE_TARGET}) add_executable(pack_id_manager_unittest PackIdManagerUnittest.cpp) target_link_libraries(pack_id_manager_unittest ${UT_BASE_TARGET}) +if (ENABLE_ENTERPRISE) + add_executable(enterprise_sls_client_manager_unittest EnterpriseSLSClientManagerUnittest.cpp) + target_link_libraries(enterprise_sls_client_manager_unittest ${UT_BASE_TARGET}) +endif () + include(GoogleTest) gtest_discover_tests(flusher_sls_unittest) gtest_discover_tests(pack_id_manager_unittest) +if (ENABLE_ENTERPRISE) + gtest_discover_tests(enterprise_sls_client_manager_unittest) +endif () diff --git a/core/unittest/pipeline/ConcurrencyLimiterUnittest.cpp b/core/unittest/pipeline/ConcurrencyLimiterUnittest.cpp index 6ddddbc8d3..e593e8db68 100644 --- a/core/unittest/pipeline/ConcurrencyLimiterUnittest.cpp +++ b/core/unittest/pipeline/ConcurrencyLimiterUnittest.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 iLogtail Authors +// Copyright 2024 iLogtail Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,12 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include - -#include - -#include "common/JsonUtil.h" #include "pipeline/limiter/ConcurrencyLimiter.h" #include "unittest/Unittest.h" @@ -31,7 +25,7 @@ class ConcurrencyLimiterUnittest : public testing::Test { }; void ConcurrencyLimiterUnittest::TestLimiter() const { - shared_ptr sConcurrencyLimiter = make_shared(80); + shared_ptr sConcurrencyLimiter = make_shared("", 80); // comcurrency = 10, count = 0 APSARA_TEST_EQUAL(true, sConcurrencyLimiter->IsValidToPop()); sConcurrencyLimiter->PostPop(); @@ -92,7 +86,6 @@ void ConcurrencyLimiterUnittest::TestLimiter() const { APSARA_TEST_EQUAL(30U, sConcurrencyLimiter->GetCurrentInterval()); } - UNIT_TEST_CASE(ConcurrencyLimiterUnittest, TestLimiter) } // namespace logtail diff --git a/core/unittest/plugin/PluginMock.h b/core/unittest/plugin/PluginMock.h index dbc9072ed7..14c95c8ce3 100644 --- a/core/unittest/plugin/PluginMock.h +++ b/core/unittest/plugin/PluginMock.h @@ -128,9 +128,18 @@ class FlusherHttpMock : public HttpFlusher { return true; } bool FlushAll() override { return mIsValid; } - std::unique_ptr BuildRequest(SenderQueueItem* item) const override { - return std::make_unique( + bool BuildRequest(SenderQueueItem* item, std::unique_ptr& req, bool* keepItem) const override { + if (item->mData == "invalid_keep") { + *keepItem = true; + return false; + } + if (item->mData == "invalid_discard") { + *keepItem = false; + return false; + } + req = std::make_unique( "", false, "", 80, "", "", std::map(), "", nullptr); + return true; } void OnSendDone(const HttpResponse& response, SenderQueueItem* item) override {} diff --git a/core/unittest/queue/SenderQueueManagerUnittest.cpp b/core/unittest/queue/SenderQueueManagerUnittest.cpp index 49ca28b7c4..6dc4c2f092 100644 --- a/core/unittest/queue/SenderQueueManagerUnittest.cpp +++ b/core/unittest/queue/SenderQueueManagerUnittest.cpp @@ -39,7 +39,7 @@ class SenderQueueManagerUnittest : public testing::Test { protected: static void SetUpTestCase() { sManager = SenderQueueManager::GetInstance(); - sConcurrencyLimiter = make_shared(80); + sConcurrencyLimiter = make_shared("", 80); sManager->mDefaultQueueParam.mCapacity = 2; sManager->mDefaultQueueParam.mLowWatermark = 1; sManager->mDefaultQueueParam.mHighWatermark = 3; @@ -62,7 +62,7 @@ class SenderQueueManagerUnittest : public testing::Test { sManager->Clear(); ExactlyOnceQueueManager::GetInstance()->Clear(); QueueKeyManager::GetInstance()->Clear(); - sConcurrencyLimiter = make_shared(80); + sConcurrencyLimiter = make_shared("", 80); } private: @@ -106,7 +106,7 @@ void SenderQueueManagerUnittest::TestCreateQueue() { } { // resued queue - shared_ptr newLimiter = make_shared(80); + shared_ptr newLimiter = make_shared("", 80); uint32_t maxRate = 10U; APSARA_TEST_TRUE( sManager->CreateQueue(0, sFlusherId, sCtx, {{"region", newLimiter}}, maxRate)); diff --git a/core/unittest/queue/SenderQueueUnittest.cpp b/core/unittest/queue/SenderQueueUnittest.cpp index 78cfa287d6..301b3a87d2 100644 --- a/core/unittest/queue/SenderQueueUnittest.cpp +++ b/core/unittest/queue/SenderQueueUnittest.cpp @@ -29,7 +29,7 @@ class SenderQueueUnittest : public testing::Test { protected: static void SetUpTestCase() { - sConcurrencyLimiter = make_shared(80); + sConcurrencyLimiter = make_shared("", 80); sCtx.SetConfigName("test_config"); } @@ -42,7 +42,7 @@ class SenderQueueUnittest : public testing::Test { void TearDown() override { sFeedback.Clear(); - sConcurrencyLimiter = make_shared(80); + sConcurrencyLimiter = make_shared("", 80); } private: diff --git a/core/unittest/sdk/SDKCommonUnittest.cpp b/core/unittest/sdk/SDKCommonUnittest.cpp index af2f062062..315a3b450d 100644 --- a/core/unittest/sdk/SDKCommonUnittest.cpp +++ b/core/unittest/sdk/SDKCommonUnittest.cpp @@ -17,7 +17,7 @@ #include "sdk/Client.h" #include "sdk/Exception.h" #include "common/CompressTools.h" -#include "sls_control/SLSControl.h" +#include "plugin/flusher/sls/EnterpriseSLSClientManager.h" DECLARE_FLAG_STRING(default_access_key_id); DECLARE_FLAG_STRING(default_access_key); diff --git a/core/unittest/sender/FlusherRunnerUnittest.cpp b/core/unittest/sender/FlusherRunnerUnittest.cpp index 20a931cfb0..9bb1b6e1fa 100644 --- a/core/unittest/sender/FlusherRunnerUnittest.cpp +++ b/core/unittest/sender/FlusherRunnerUnittest.cpp @@ -19,6 +19,8 @@ #include "unittest/Unittest.h" #include "unittest/plugin/PluginMock.h" +DECLARE_FLAG_INT32(discard_send_fail_interval); + using namespace std; namespace logtail { @@ -26,6 +28,13 @@ namespace logtail { class FlusherRunnerUnittest : public ::testing::Test { public: void TestDispatch(); + void TestPushToHttpSink(); + +protected: + void TearDown() override { + SenderQueueManager::GetInstance()->Clear(); + HttpSink::GetInstance()->mQueue.Clear(); + } }; void FlusherRunnerUnittest::TestDispatch() { @@ -67,7 +76,68 @@ void FlusherRunnerUnittest::TestDispatch() { } } +void FlusherRunnerUnittest::TestPushToHttpSink() { + auto flusher = make_unique(); + Json::Value tmp; + PipelineContext ctx; + flusher->SetContext(ctx); + flusher->SetMetricsRecordRef("name", "1"); + flusher->Init(Json::Value(), tmp); + { + // keep item + auto item = make_unique("invalid_keep", 10, flusher.get(), flusher->GetQueueKey()); + auto realItem = item.get(); + flusher->PushToQueue(std::move(item)); + + vector items; + SenderQueueManager::GetInstance()->GetAvailableItems(items, -1); + APSARA_TEST_EQUAL(1U, items.size()); + APSARA_TEST_EQUAL(realItem, items[0]); + FlusherRunner::GetInstance()->Dispatch(items[0]); + + APSARA_TEST_FALSE(SenderQueueManager::GetInstance()->mQueues.at(flusher->GetQueueKey()).Empty()); + APSARA_TEST_EQUAL(SendingStatus::IDLE, realItem->mStatus); + APSARA_TEST_TRUE(HttpSink::GetInstance()->mQueue.Empty()); + SenderQueueManager::GetInstance()->RemoveItem(flusher->GetQueueKey(), realItem); + HttpSink::GetInstance()->mQueue.Clear(); + } + { + // keep item, but outdated + auto item = make_unique("invalid_keep", 10, flusher.get(), flusher->GetQueueKey()); + auto realItem = item.get(); + flusher->PushToQueue(std::move(item)); + + vector items; + SenderQueueManager::GetInstance()->GetAvailableItems(items, -1); + APSARA_TEST_EQUAL(1U, items.size()); + APSARA_TEST_EQUAL(realItem, items[0]); + INT32_FLAG(discard_send_fail_interval) = 0; + FlusherRunner::GetInstance()->Dispatch(items[0]); + APSARA_TEST_TRUE(SenderQueueManager::GetInstance()->mQueues.at(flusher->GetQueueKey()).Empty()); + APSARA_TEST_TRUE(HttpSink::GetInstance()->mQueue.Empty()); + INT32_FLAG(discard_send_fail_interval) = 6 * 3600; + HttpSink::GetInstance()->mQueue.Clear(); + } + { + // discard item + auto item = make_unique("invalid_discard", 10, flusher.get(), flusher->GetQueueKey()); + auto realItem = item.get(); + flusher->PushToQueue(std::move(item)); + + vector items; + SenderQueueManager::GetInstance()->GetAvailableItems(items, -1); + APSARA_TEST_EQUAL(1U, items.size()); + APSARA_TEST_EQUAL(realItem, items[0]); + FlusherRunner::GetInstance()->Dispatch(items[0]); + + APSARA_TEST_TRUE(SenderQueueManager::GetInstance()->mQueues.at(flusher->GetQueueKey()).Empty()); + APSARA_TEST_TRUE(HttpSink::GetInstance()->mQueue.Empty()); + HttpSink::GetInstance()->mQueue.Clear(); + } +} + UNIT_TEST_CASE(FlusherRunnerUnittest, TestDispatch) +UNIT_TEST_CASE(FlusherRunnerUnittest, TestPushToHttpSink) } // namespace logtail From d24d082c87761d8db3f2ecdbc16de87d963a1688 Mon Sep 17 00:00:00 2001 From: Bingchang Chen Date: Tue, 3 Dec 2024 11:24:40 +0800 Subject: [PATCH 3/6] fix: force read ut memory leak (#1941) --- core/unittest/reader/CMakeLists.txt | 6 +++--- core/unittest/reader/ForceReadUnittest.cpp | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/core/unittest/reader/CMakeLists.txt b/core/unittest/reader/CMakeLists.txt index 80df50da23..f870f9adfd 100644 --- a/core/unittest/reader/CMakeLists.txt +++ b/core/unittest/reader/CMakeLists.txt @@ -36,8 +36,8 @@ target_link_libraries(source_buffer_unittest ${UT_BASE_TARGET}) add_executable(get_last_line_data_unittest GetLastLineDataUnittest.cpp) target_link_libraries(get_last_line_data_unittest ${UT_BASE_TARGET}) -# add_executable(force_read_unittest ForceReadUnittest.cpp) -# target_link_libraries(force_read_unittest ${UT_BASE_TARGET}) +add_executable(force_read_unittest ForceReadUnittest.cpp) +target_link_libraries(force_read_unittest ${UT_BASE_TARGET}) if (UNIX) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testDataSet) @@ -55,4 +55,4 @@ gtest_discover_tests(remove_last_incomplete_log_unittest) gtest_discover_tests(log_file_reader_unittest) gtest_discover_tests(source_buffer_unittest) gtest_discover_tests(get_last_line_data_unittest) -# gtest_discover_tests(force_read_unittest) +gtest_discover_tests(force_read_unittest) diff --git a/core/unittest/reader/ForceReadUnittest.cpp b/core/unittest/reader/ForceReadUnittest.cpp index db306614ad..a14e4bf6cc 100644 --- a/core/unittest/reader/ForceReadUnittest.cpp +++ b/core/unittest/reader/ForceReadUnittest.cpp @@ -19,16 +19,16 @@ #include #include -#include "constants/Constants.h" #include "common/FileSystemUtil.h" #include "common/Flags.h" #include "common/JsonUtil.h" #include "config/PipelineConfig.h" +#include "constants/Constants.h" #include "file_server/ConfigManager.h" +#include "file_server/FileServer.h" #include "file_server/event/BlockEventManager.h" #include "file_server/event/Event.h" #include "file_server/event_handler/EventHandler.h" -#include "file_server/FileServer.h" #include "logger/Logger.h" #include "pipeline/Pipeline.h" #include "pipeline/queue/ProcessQueueManager.h" @@ -119,7 +119,17 @@ class ForceReadUnittest : public testing::Test { ProcessQueueManager::GetInstance()->EnablePop(mConfigName); } - void TearDown() override { remove(utf8File.c_str()); } + void TearDown() override { + remove(utf8File.c_str()); + for (auto iter = BlockedEventManager::GetInstance()->mEventMap.begin(); + iter != BlockedEventManager::GetInstance()->mEventMap.end(); + ++iter) { + if (iter->second.mEvent != nullptr) { + delete iter->second.mEvent; + } + } + BlockedEventManager::GetInstance()->mEventMap.clear(); + } private: std::unique_ptr expectedContent; @@ -345,8 +355,6 @@ void ForceReadUnittest::TestAddTimeoutEvent() { reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING); reader.CheckFileSignatureAndOffset(true); LogFileReader::BUFFER_SIZE = 10; - BlockedEventManager::GetInstance()->mEventMap.clear(); - APSARA_TEST_EQUAL_FATAL(BlockedEventManager::GetInstance()->mEventMap.size(), 0U); auto pHanlder = make_unique(mConfigName, mConfig); pHanlder->mReadFileTimeSlice = 0; // force one read for one event From 6a4909add23c22e45247632195d531bb0e88ca66 Mon Sep 17 00:00:00 2001 From: dog Date: Tue, 3 Dec 2024 15:04:25 +0800 Subject: [PATCH 4/6] fix: add mLatestScrapeTime for scrapeTimestamp (#1940) * fix: add mLatestScrapeTime for scrapeTimestamp * chore: add ut * update --- core/prometheus/PrometheusInputRunner.cpp | 12 ++++--- core/prometheus/schedulers/BaseScheduler.cpp | 13 ++++--- core/prometheus/schedulers/BaseScheduler.h | 7 +++- .../prometheus/schedulers/ScrapeScheduler.cpp | 17 +++++---- .../schedulers/TargetSubscriberScheduler.cpp | 6 ++-- .../prometheus/ScrapeSchedulerUnittest.cpp | 35 +++++++++++++++++-- 6 files changed, 70 insertions(+), 20 deletions(-) diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index ca49e35091..052959bb15 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -85,10 +85,14 @@ void PrometheusInputRunner::UpdateScrapeInput(std::shared_ptrmUnRegisterMs = mUnRegisterMs.load(); targetSubscriber->SetComponent(mTimer, &mEventPool); - auto randSleepMilliSec = GetRandSleepMilliSec( - targetSubscriber->GetId(), prometheus::RefeshIntervalSeconds, GetCurrentTimeInMilliSeconds()); - auto firstExecTime = std::chrono::steady_clock::now() + std::chrono::milliseconds(randSleepMilliSec); - targetSubscriber->SetFirstExecTime(firstExecTime); + auto currSystemTime = chrono::system_clock::now(); + auto randSleepMilliSec + = GetRandSleepMilliSec(targetSubscriber->GetId(), + prometheus::RefeshIntervalSeconds, + chrono::duration_cast(currSystemTime.time_since_epoch()).count()); + auto firstExecTime = chrono::steady_clock::now() + chrono::milliseconds(randSleepMilliSec); + auto firstSubscribeTime = currSystemTime + chrono::milliseconds(randSleepMilliSec); + targetSubscriber->SetFirstExecTime(firstExecTime, firstSubscribeTime); // 1. add subscriber to mTargetSubscriberSchedulerMap { WriteLock lock(mSubscriberMapRWLock); diff --git a/core/prometheus/schedulers/BaseScheduler.cpp b/core/prometheus/schedulers/BaseScheduler.cpp index af564f1622..e9e7897c35 100644 --- a/core/prometheus/schedulers/BaseScheduler.cpp +++ b/core/prometheus/schedulers/BaseScheduler.cpp @@ -8,20 +8,25 @@ using namespace std; namespace logtail { void BaseScheduler::ExecDone() { mExecCount++; - mLatestExecTime = mFirstExecTime + std::chrono::seconds(mExecCount * mInterval); + mLatestExecTime = mFirstExecTime + chrono::seconds(mExecCount * mInterval); + mLatestScrapeTime = mFirstScrapeTime + chrono::seconds(mExecCount * mInterval); } -std::chrono::steady_clock::time_point BaseScheduler::GetNextExecTime() { +chrono::steady_clock::time_point BaseScheduler::GetNextExecTime() { return mLatestExecTime; } -void BaseScheduler::SetFirstExecTime(std::chrono::steady_clock::time_point firstExecTime) { +void BaseScheduler::SetFirstExecTime(chrono::steady_clock::time_point firstExecTime, + chrono::system_clock::time_point firstScrapeTime) { mFirstExecTime = firstExecTime; mLatestExecTime = mFirstExecTime; + mFirstScrapeTime = firstScrapeTime; + mLatestScrapeTime = mFirstScrapeTime; } void BaseScheduler::DelayExecTime(uint64_t delaySeconds) { - mLatestExecTime = mLatestExecTime + std::chrono::seconds(delaySeconds); + mLatestExecTime = mLatestExecTime + chrono::seconds(delaySeconds); + mLatestScrapeTime = mLatestScrapeTime + chrono::seconds(delaySeconds); } void BaseScheduler::Cancel() { diff --git a/core/prometheus/schedulers/BaseScheduler.h b/core/prometheus/schedulers/BaseScheduler.h index 26739cdcd0..3d57155bd5 100644 --- a/core/prometheus/schedulers/BaseScheduler.h +++ b/core/prometheus/schedulers/BaseScheduler.h @@ -20,7 +20,7 @@ class BaseScheduler { std::chrono::steady_clock::time_point GetNextExecTime(); - void SetFirstExecTime(std::chrono::steady_clock::time_point firstExecTime); + void SetFirstExecTime(std::chrono::steady_clock::time_point firstExecTime,std::chrono::system_clock::time_point firstScrapeTime); void DelayExecTime(uint64_t delaySeconds); virtual void Cancel(); @@ -29,6 +29,11 @@ class BaseScheduler { protected: bool IsCancelled(); + // for scrape monitor + std::chrono::system_clock::time_point mFirstScrapeTime; + std::chrono::system_clock::time_point mLatestScrapeTime; + + // for scheduler std::chrono::steady_clock::time_point mFirstExecTime; std::chrono::steady_clock::time_point mLatestExecTime; int64_t mExecCount = 0; diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index 81d59fcf28..07c3291311 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -86,17 +86,20 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, mInterval = mScrapeConfigPtr->mScrapeIntervalSeconds; } -void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t timestampMilliSec) { +void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t) { + static double sRate = 0.001; + auto now = GetCurrentTimeInMilliSeconds(); + mScrapeTimestampMilliSec + = chrono::duration_cast(mLatestScrapeTime.time_since_epoch()).count(); + auto scrapeDurationMilliSeconds = now - mScrapeTimestampMilliSec; + auto& responseBody = *response.GetBody(); responseBody.FlushCache(); mSelfMonitor->AddCounter(METRIC_PLUGIN_OUT_EVENTS_TOTAL, response.GetStatusCode()); mSelfMonitor->AddCounter(METRIC_PLUGIN_OUT_SIZE_BYTES, response.GetStatusCode(), responseBody.mRawSize); - mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_TIME_MS, - response.GetStatusCode(), - GetCurrentTimeInMilliSeconds() - timestampMilliSec); + mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_TIME_MS, response.GetStatusCode(), scrapeDurationMilliSeconds); - mScrapeTimestampMilliSec = timestampMilliSec; - mScrapeDurationSeconds = 1.0 * (GetCurrentTimeInMilliSeconds() - timestampMilliSec) / 1000; + mScrapeDurationSeconds = scrapeDurationMilliSeconds * sRate; mScrapeResponseSizeBytes = responseBody.mRawSize; mUpState = response.GetStatusCode() == 200; if (response.GetStatusCode() != 200) { @@ -114,7 +117,7 @@ void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t timestampM SetAutoMetricMeta(eventGroup); SetTargetLabels(eventGroup); PushEventGroup(std::move(eventGroup)); - mPluginTotalDelayMs->Add(GetCurrentTimeInMilliSeconds() - timestampMilliSec); + mPluginTotalDelayMs->Add(scrapeDurationMilliSeconds); } void ScrapeScheduler::SetAutoMetricMeta(PipelineEventGroup& eGroup) { diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 42381f1229..7745f6dda1 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -228,7 +228,8 @@ TargetSubscriberScheduler::BuildScrapeSchedulerSet(std::vector& targetGr auto randSleepMilliSec = GetRandSleepMilliSec( scrapeScheduler->GetId(), mScrapeConfigPtr->mScrapeIntervalSeconds, GetCurrentTimeInMilliSeconds()); auto firstExecTime = std::chrono::steady_clock::now() + std::chrono::milliseconds(randSleepMilliSec); - scrapeScheduler->SetFirstExecTime(firstExecTime); + auto firstScrapeTIme = std::chrono::system_clock::now() + std::chrono::milliseconds(randSleepMilliSec); + scrapeScheduler->SetFirstExecTime(firstExecTime, firstScrapeTIme); scrapeScheduler->InitSelfMonitor(mDefaultLabels); scrapeSchedulerMap[scrapeScheduler->GetId()] = scrapeScheduler; @@ -333,7 +334,8 @@ void TargetSubscriberScheduler::InitSelfMonitor(const MetricLabels& defaultLabel mSelfMonitor = std::make_shared(); mSelfMonitor->InitMetricManager(sSubscriberMetricKeys, mDefaultLabels); - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE, std::move(mDefaultLabels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef( + mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE, std::move(mDefaultLabels)); mPromSubscriberTargets = mMetricsRecordRef.CreateIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS); mTotalDelayMs = mMetricsRecordRef.CreateCounter(METRIC_PLUGIN_TOTAL_DELAY_MS); } diff --git a/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp b/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp index 2c5138768b..a5931039fc 100644 --- a/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp +++ b/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp @@ -32,7 +32,6 @@ using namespace std; namespace logtail { - class ScrapeSchedulerUnittest : public testing::Test { public: void TestInitscrapeScheduler(); @@ -42,6 +41,7 @@ class ScrapeSchedulerUnittest : public testing::Test { void TestScheduler(); void TestQueueIsFull(); + void TestExactlyScrape(); protected: void SetUp() override { @@ -220,7 +220,8 @@ void ScrapeSchedulerUnittest::TestQueueIsFull() { EventPool eventPool{true}; event.SetComponent(timer, &eventPool); auto now = std::chrono::steady_clock::now(); - event.SetFirstExecTime(now); + auto nowScrape = std::chrono::system_clock::now(); + event.SetFirstExecTime(now, nowScrape); event.ScheduleNext(); APSARA_TEST_TRUE(timer->mQueue.size() == 1); @@ -235,11 +236,41 @@ void ScrapeSchedulerUnittest::TestQueueIsFull() { APSARA_TEST_EQUAL(now + std::chrono::seconds(1), next->GetExecTime()); } +void ScrapeSchedulerUnittest::TestExactlyScrape() { + Labels labels; + labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); + ScrapeScheduler event(mScrapeConfig, "localhost", 8080, labels, 0, 0); + auto defaultLabels = MetricLabels(); + event.InitSelfMonitor(defaultLabels); + auto timer = make_shared(); + EventPool eventPool{true}; + event.SetComponent(timer, &eventPool); + auto execTime = std::chrono::steady_clock::now(); + auto scrapeTime = std::chrono::system_clock::now(); + event.SetFirstExecTime(execTime, scrapeTime); + + auto firstScrapeTime = event.mLatestScrapeTime; + event.ExecDone(); + auto secondScrapeTime = event.mLatestScrapeTime; + event.ExecDone(); + event.DelayExecTime(1); + auto thirdScrapeTime = event.mLatestScrapeTime; + event.ExecDone(); + auto fourthScrapeTime = event.mLatestScrapeTime; + APSARA_TEST_EQUAL(firstScrapeTime, scrapeTime); + APSARA_TEST_EQUAL(secondScrapeTime - firstScrapeTime, std::chrono::seconds(mScrapeConfig->mScrapeIntervalSeconds)); + APSARA_TEST_EQUAL(thirdScrapeTime - firstScrapeTime, + std::chrono::seconds(mScrapeConfig->mScrapeIntervalSeconds * 2 + 1)); + APSARA_TEST_EQUAL(fourthScrapeTime - firstScrapeTime, + std::chrono::seconds(mScrapeConfig->mScrapeIntervalSeconds * 3)); +} + UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestInitscrapeScheduler) UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestProcess) UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestStreamMetricWriteCallback) UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestScheduler) UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestQueueIsFull) +UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestExactlyScrape) } // namespace logtail From 293891f8af5b7b94b42fe2b943e92f7184a41ec9 Mon Sep 17 00:00:00 2001 From: Takuka0311 <1914426213@qq.com> Date: Tue, 3 Dec 2024 15:11:44 +0800 Subject: [PATCH 5/6] feat: add builtin pipelines (#1936) * add internal config provedier * polish * polish * polish * polish --- core/config/watcher/PipelineConfigWatcher.cpp | 75 ++++++++++--------- core/config/watcher/PipelineConfigWatcher.h | 2 +- core/monitor/Monitor.cpp | 47 ------------ core/monitor/Monitor.h | 5 -- .../config/CommonConfigProviderUnittest.cpp | 19 ++--- core/unittest/config/ConfigUpdateUnittest.cpp | 3 +- .../unittest/config/ConfigWatcherUnittest.cpp | 7 +- 7 files changed, 56 insertions(+), 102 deletions(-) diff --git a/core/config/watcher/PipelineConfigWatcher.cpp b/core/config/watcher/PipelineConfigWatcher.cpp index 7c91068b72..d68d7cedf3 100644 --- a/core/config/watcher/PipelineConfigWatcher.cpp +++ b/core/config/watcher/PipelineConfigWatcher.cpp @@ -18,6 +18,10 @@ #include "common/FileSystemUtil.h" #include "config/ConfigUtil.h" +#include "config/common_provider/CommonConfigProvider.h" +#ifdef __ENTERPRISE__ +#include "config/provider/EnterpriseConfigProvider.h" +#endif #include "logger/Logger.h" #include "monitor/Monitor.h" #include "pipeline/PipelineManager.h" @@ -37,9 +41,9 @@ pair PipelineConfigWatcher::CheckConfigDiff( PipelineConfigDiff pDiff; TaskConfigDiff tDiff; unordered_set configSet; - // inner configs - InsertInnerPipelines(pDiff, tDiff, configSet); - // configs from file + // builtin pipeline configs + InsertBuiltInPipelines(pDiff, tDiff, configSet); + // file pipeline configs InsertPipelines(pDiff, tDiff, configSet); for (const auto& name : mPipelineManager->GetAllConfigNames()) { @@ -83,87 +87,90 @@ pair PipelineConfigWatcher::CheckConfigDiff( return make_pair(std::move(pDiff), std::move(tDiff)); } -void PipelineConfigWatcher::InsertInnerPipelines(PipelineConfigDiff& pDiff, +void PipelineConfigWatcher::InsertBuiltInPipelines(PipelineConfigDiff& pDiff, TaskConfigDiff& tDiff, unordered_set& configSet) { - std::map innerPipelines; - // self-monitor metric - innerPipelines[LoongCollectorMonitor::GetInnerSelfMonitorMetricPipelineName()] - = LoongCollectorMonitor::GetInnerSelfMonitorMetricPipeline(); +#ifdef __ENTERPRISE__ + const std::map& builtInPipelines + = EnterpriseConfigProvider::GetInstance()->GetAllBuiltInPipelineConfigs(); - // process - for (const auto& pipeline : innerPipelines) { - if (configSet.find(pipeline.first) != configSet.end()) { + for (const auto& pipeline : builtInPipelines) { + const string& pipelineName = pipeline.first; + const string& pipleineDetail = pipeline.second; + if (configSet.find(pipelineName) != configSet.end()) { LOG_WARNING(sLogger, ("more than 1 config with the same name is found", "skip current config")("inner pipeline", - pipeline.first)); + pipelineName)); continue; } - configSet.insert(pipeline.first); + configSet.insert(pipelineName); string errorMsg; - auto iter = mInnerConfigMap.find(pipeline.first); + auto iter = mInnerConfigMap.find(pipelineName); if (iter == mInnerConfigMap.end()) { - mInnerConfigMap[pipeline.first] = pipeline.second; + mInnerConfigMap[pipelineName] = pipleineDetail; unique_ptr detail = make_unique(); - if (!ParseConfigDetail(pipeline.second, ".json", *detail, errorMsg)) { + if (!ParseConfigDetail(pipleineDetail, ".json", *detail, errorMsg)) { LOG_WARNING(sLogger, ("config format error", "skip current object")("error msg", errorMsg)("inner pipeline", - pipeline.first)); + pipelineName)); continue; } - if (!IsConfigEnabled(pipeline.first, *detail)) { - LOG_INFO(sLogger, ("new config found and disabled", "skip current object")("config", pipeline.first)); + if (!IsConfigEnabled(pipelineName, *detail)) { + LOG_INFO(sLogger, ("new config found and disabled", "skip current object")("config", pipelineName)); continue; } - if (!CheckAddedConfig(pipeline.first, std::move(detail), pDiff, tDiff)) { + if (!CheckAddedConfig(pipelineName, std::move(detail), pDiff, tDiff)) { continue; } - } else if (pipeline.second != iter->second) { - mInnerConfigMap[pipeline.first] = pipeline.second; + } else if (pipleineDetail != iter->second) { + mInnerConfigMap[pipelineName] = pipleineDetail; unique_ptr detail = make_unique(); - if (!ParseConfigDetail(pipeline.second, ".json", *detail, errorMsg)) { + if (!ParseConfigDetail(pipleineDetail, ".json", *detail, errorMsg)) { LOG_WARNING(sLogger, ("config format error", "skip current object")("error msg", errorMsg)("inner pipeline", - pipeline.first)); + pipelineName)); continue; } - if (!IsConfigEnabled(pipeline.first, *detail)) { + if (!IsConfigEnabled(pipelineName, *detail)) { switch (GetConfigType(*detail)) { case ConfigType::Pipeline: - if (mPipelineManager->FindConfigByName(pipeline.first)) { - pDiff.mRemoved.push_back(pipeline.first); + if (mPipelineManager->FindConfigByName(pipelineName)) { + pDiff.mRemoved.push_back(pipelineName); LOG_INFO(sLogger, ("existing valid config modified and disabled", - "prepare to stop current running pipeline")("config", pipeline.first)); + "prepare to stop current running pipeline")("config", pipelineName)); } else { LOG_INFO(sLogger, ("existing invalid config modified and disabled", - "skip current object")("config", pipeline.first)); + "skip current object")("config", pipelineName)); } break; case ConfigType::Task: - if (mTaskPipelineManager->FindPipelineByName(pipeline.first)) { - tDiff.mRemoved.push_back(pipeline.first); + if (mTaskPipelineManager->FindPipelineByName(pipelineName)) { + tDiff.mRemoved.push_back(pipelineName); LOG_INFO(sLogger, ("existing valid config modified and disabled", - "prepare to stop current running task")("config", pipeline.first)); + "prepare to stop current running task")("config", pipelineName)); } else { LOG_INFO(sLogger, ("existing invalid config modified and disabled", - "skip current object")("config", pipeline.first)); + "skip current object")("config", pipelineName)); } break; } continue; } - if (!CheckModifiedConfig(pipeline.first, std::move(detail), pDiff, tDiff)) { + if (!CheckModifiedConfig(pipelineName, std::move(detail), pDiff, tDiff)) { continue; } } else { LOG_DEBUG(sLogger, ("existing inner config unchanged", "skip current object")); } } +#else + return; +#endif } void PipelineConfigWatcher::InsertPipelines(PipelineConfigDiff& pDiff, diff --git a/core/config/watcher/PipelineConfigWatcher.h b/core/config/watcher/PipelineConfigWatcher.h index 28a7f00f97..98c2677264 100644 --- a/core/config/watcher/PipelineConfigWatcher.h +++ b/core/config/watcher/PipelineConfigWatcher.h @@ -46,7 +46,7 @@ class PipelineConfigWatcher : public ConfigWatcher { PipelineConfigWatcher(); ~PipelineConfigWatcher() = default; - void InsertInnerPipelines(PipelineConfigDiff& pDiff, TaskConfigDiff& tDiff, std::unordered_set& configSet); + void InsertBuiltInPipelines(PipelineConfigDiff& pDiff, TaskConfigDiff& tDiff, std::unordered_set& configSet); void InsertPipelines(PipelineConfigDiff& pDiff, TaskConfigDiff& tDiff, std::unordered_set& configSet); bool CheckAddedConfig(const std::string& configName, std::unique_ptr&& configDetail, diff --git a/core/monitor/Monitor.cpp b/core/monitor/Monitor.cpp index 21a51952de..5d9f40e74e 100644 --- a/core/monitor/Monitor.cpp +++ b/core/monitor/Monitor.cpp @@ -759,51 +759,4 @@ void LoongCollectorMonitor::Stop() { } -const string LoongCollectorMonitor::GetInnerSelfMonitorMetricPipeline() { -#ifdef __ENTERPRISE__ - static string pipeline = ""; -#else - static string pipeline = R"( - { - "inputs": [ - { - "Type": "input_internal_metrics", - "Agent": { - "Enable": false, - "Interval": 1 - }, - "Runner": { - "Enable": false, - "Interval": 1 - }, - "Pipeline": { - "Enable": true, - "Interval": 1 - }, - "PluginSource": { - "Enable": true, - "Interval": 10 - }, - "Plugin": { - "Enable": false, - "Interval": 10 - }, - "Component": { - "Enable": false, - "Interval": 10 - } - } - ], - "flushers": [ - { - "Type": "flusher_file", - "FilePath": "./log/self_metrics.log" - } - ] - } - )"; -#endif - return pipeline; -} - } // namespace logtail diff --git a/core/monitor/Monitor.h b/core/monitor/Monitor.h index 372407d8f1..c66a47218e 100644 --- a/core/monitor/Monitor.h +++ b/core/monitor/Monitor.h @@ -192,11 +192,6 @@ class LoongCollectorMonitor { void Init(); void Stop(); - static const std::string GetInnerSelfMonitorAlarmPipelineName() { return ""; } - static const std::string GetInnerSelfMonitorAlarmPipeline() { return ""; } - static const std::string GetInnerSelfMonitorMetricPipelineName() { return "inner-self-monitor-metric-pipeline"; } - static const std::string GetInnerSelfMonitorMetricPipeline(); - void SetAgentCpu(double cpu) { mAgentCpu->Set(cpu); } void SetAgentMemory(uint64_t mem) { mAgentMemory->Set(mem); } void SetAgentGoMemory(uint64_t mem) { mAgentGoMemory->Set(mem); } diff --git a/core/unittest/config/CommonConfigProviderUnittest.cpp b/core/unittest/config/CommonConfigProviderUnittest.cpp index c08df532a7..451880ceb4 100644 --- a/core/unittest/config/CommonConfigProviderUnittest.cpp +++ b/core/unittest/config/CommonConfigProviderUnittest.cpp @@ -435,20 +435,17 @@ void CommonConfigProviderUnittest::TestGetConfigUpdateAndConfigWatcher() { auto pipelineConfigDiff = PipelineConfigWatcher::GetInstance()->CheckConfigDiff(); PipelineManager::GetInstance()->UpdatePipelines(pipelineConfigDiff.first); APSARA_TEST_TRUE(!pipelineConfigDiff.first.IsEmpty()); - APSARA_TEST_EQUAL(2U, pipelineConfigDiff.first.mAdded.size()); - APSARA_TEST_EQUAL(pipelineConfigDiff.first.mAdded[0].mName, LoongCollectorMonitor::GetInnerSelfMonitorMetricPipelineName()); - APSARA_TEST_EQUAL(pipelineConfigDiff.first.mAdded[1].mName, "config1"); - APSARA_TEST_EQUAL(PipelineManager::GetInstance()->GetAllConfigNames().size(), 2); - APSARA_TEST_EQUAL(PipelineManager::GetInstance()->GetAllConfigNames()[0], "config1"); - APSARA_TEST_EQUAL(PipelineManager::GetInstance()->GetAllConfigNames()[1], LoongCollectorMonitor::GetInnerSelfMonitorMetricPipelineName()); + APSARA_TEST_EQUAL(1U, pipelineConfigDiff.first.mAdded.size()); + APSARA_TEST_EQUAL(pipelineConfigDiff.first.mAdded[0].mName, "config1"); + APSARA_TEST_EQUAL(PipelineManager::GetInstance()->GetAllConfigNames().size(), 1); + APSARA_TEST_TRUE(PipelineManager::GetInstance()->FindConfigByName("config1").get() != nullptr); // 再次处理 pipelineconfig pipelineConfigDiff = PipelineConfigWatcher::GetInstance()->CheckConfigDiff(); PipelineManager::GetInstance()->UpdatePipelines(pipelineConfigDiff.first); APSARA_TEST_TRUE(pipelineConfigDiff.first.IsEmpty()); APSARA_TEST_TRUE(pipelineConfigDiff.first.mAdded.empty()); - APSARA_TEST_EQUAL(PipelineManager::GetInstance()->GetAllConfigNames().size(), 2); - APSARA_TEST_EQUAL(PipelineManager::GetInstance()->GetAllConfigNames()[0], "config1"); - APSARA_TEST_EQUAL(PipelineManager::GetInstance()->GetAllConfigNames()[1], LoongCollectorMonitor::GetInnerSelfMonitorMetricPipelineName()); + APSARA_TEST_EQUAL(PipelineManager::GetInstance()->GetAllConfigNames().size(), 1); + APSARA_TEST_TRUE(PipelineManager::GetInstance()->FindConfigByName("config1").get() != nullptr); APSARA_TEST_EQUAL(provider.mInstanceConfigInfoMap.size(), 2); @@ -656,13 +653,13 @@ void CommonConfigProviderUnittest::TestGetConfigUpdateAndConfigWatcher() { APSARA_TEST_TRUE(!pipelineConfigDiff.first.IsEmpty()); APSARA_TEST_EQUAL(1U, pipelineConfigDiff.first.mRemoved.size()); APSARA_TEST_EQUAL(pipelineConfigDiff.first.mRemoved[0], "config1"); - APSARA_TEST_EQUAL(1U, PipelineManager::GetInstance()->GetAllConfigNames().size()); + APSARA_TEST_EQUAL(0U, PipelineManager::GetInstance()->GetAllConfigNames().size()); // 再次处理pipelineConfigDiff pipelineConfigDiff = PipelineConfigWatcher::GetInstance()->CheckConfigDiff(); PipelineManager::GetInstance()->UpdatePipelines(pipelineConfigDiff.first); APSARA_TEST_TRUE(pipelineConfigDiff.first.IsEmpty()); APSARA_TEST_TRUE(pipelineConfigDiff.first.mRemoved.empty()); - APSARA_TEST_EQUAL(1U, PipelineManager::GetInstance()->GetAllConfigNames().size()); + APSARA_TEST_EQUAL(0U, PipelineManager::GetInstance()->GetAllConfigNames().size()); APSARA_TEST_TRUE(provider.mInstanceConfigInfoMap.empty()); // 处理instanceConfigDiff diff --git a/core/unittest/config/ConfigUpdateUnittest.cpp b/core/unittest/config/ConfigUpdateUnittest.cpp index c3daca67b8..341f48f00d 100644 --- a/core/unittest/config/ConfigUpdateUnittest.cpp +++ b/core/unittest/config/ConfigUpdateUnittest.cpp @@ -19,6 +19,7 @@ #include #include "config/PipelineConfig.h" +#include "config/common_provider/CommonConfigProvider.h" #include "config/watcher/PipelineConfigWatcher.h" #include "pipeline/Pipeline.h" #include "pipeline/PipelineManager.h" @@ -267,7 +268,7 @@ class ConfigUpdateUnittest : public testing::Test { void ConfigUpdateUnittest::OnStartUp() const { auto diff = PipelineConfigWatcher::GetInstance()->CheckConfigDiff(); - APSARA_TEST_EQUAL(1U, diff.first.mAdded.size()); + APSARA_TEST_EQUAL(0U, diff.first.mAdded.size()); APSARA_TEST_TRUE(diff.second.IsEmpty()); GenerateInitialConfigs(); diff --git a/core/unittest/config/ConfigWatcherUnittest.cpp b/core/unittest/config/ConfigWatcherUnittest.cpp index 975e19974d..8ed7327cbc 100644 --- a/core/unittest/config/ConfigWatcherUnittest.cpp +++ b/core/unittest/config/ConfigWatcherUnittest.cpp @@ -16,6 +16,7 @@ #include #include "config/ConfigDiff.h" +#include "config/common_provider/CommonConfigProvider.h" #include "config/watcher/InstanceConfigWatcher.h" #include "config/watcher/PipelineConfigWatcher.h" #include "pipeline/plugin/PluginRegistry.h" @@ -50,7 +51,7 @@ const filesystem::path ConfigWatcherUnittest::instanceConfigDir = "./instance_co void ConfigWatcherUnittest::InvalidConfigDirFound() const { { auto diff = PipelineConfigWatcher::GetInstance()->CheckConfigDiff(); - APSARA_TEST_EQUAL(1U, diff.first.mAdded.size()); + APSARA_TEST_EQUAL(0U, diff.first.mAdded.size()); APSARA_TEST_TRUE(diff.second.IsEmpty()); { ofstream fout("continuous_pipeline_config"); } @@ -82,7 +83,7 @@ void ConfigWatcherUnittest::InvalidConfigFileFound() const { fout << "[}"; } auto diff = PipelineConfigWatcher::GetInstance()->CheckConfigDiff(); - APSARA_TEST_EQUAL(1U, diff.first.mAdded.size()); + APSARA_TEST_EQUAL(0U, diff.first.mAdded.size()); APSARA_TEST_TRUE(diff.second.IsEmpty()); filesystem::remove_all(configDir); } @@ -132,7 +133,7 @@ void ConfigWatcherUnittest::DuplicateConfigs() const { { ofstream fout("dir2/config.json"); } auto diff = PipelineConfigWatcher::GetInstance()->CheckConfigDiff(); APSARA_TEST_FALSE(diff.first.IsEmpty()); - APSARA_TEST_EQUAL(2U, diff.first.mAdded.size()); + APSARA_TEST_EQUAL(1U, diff.first.mAdded.size()); filesystem::remove_all("dir1"); filesystem::remove_all("dir2"); From 0254c5cc72f6485d6c358ba1c6c1105a7f066e7d Mon Sep 17 00:00:00 2001 From: Takuka0311 <1914426213@qq.com> Date: Tue, 3 Dec 2024 15:20:28 +0800 Subject: [PATCH 6/6] Refactor self-monitor directory structure and remove status_profile (#1932) * init * polish * polish * polish * polish --- core/CMakeLists.txt | 2 +- core/app_config/AppConfig.cpp | 17 -- core/app_config/AppConfig.h | 2 - core/application/Application.cpp | 3 - core/ebpf/SelfMonitor.h | 4 +- core/ebpf/eBPFServer.cpp | 2 +- core/ebpf/eBPFServer.h | 2 +- core/ebpf/handler/AbstractHandler.h | 2 +- core/file_server/FileServer.h | 2 +- core/file_server/event_handler/LogInput.cpp | 15 +- core/file_server/polling/PollingDirFile.cpp | 12 +- core/file_server/polling/PollingModify.cpp | 4 +- core/logger/Logger.cpp | 8 - core/monitor/MetricManager.cpp | 158 ------------- core/monitor/MetricManager.h | 47 +--- core/monitor/MetricStore.cpp | 81 ------- core/monitor/MetricStore.h | 58 ----- core/monitor/Monitor.cpp | 123 +---------- core/monitor/Monitor.h | 7 +- .../{ => metric_models}/MetricRecord.cpp | 0 .../{ => metric_models}/MetricRecord.h | 0 .../monitor/{ => metric_models}/MetricTypes.h | 0 .../ReentrantMetricsRecord.cpp} | 2 +- .../ReentrantMetricsRecord.h} | 0 .../metric_models/SelfMonitorMetricEvent.cpp | 187 ++++++++++++++++ .../metric_models/SelfMonitorMetricEvent.h | 69 ++++++ .../plugin/instance/FlusherInstance.h | 2 +- core/plugin/input/InputContainerStdio.h | 2 +- core/plugin/input/InputFile.h | 2 +- core/plugin/input/InputFileSecurity.h | 2 +- core/plugin/input/InputNetworkObserver.cpp | 2 +- core/plugin/input/InputNetworkObserver.h | 2 +- core/plugin/input/InputNetworkSecurity.h | 2 +- core/plugin/input/InputProcessSecurity.h | 2 +- core/prometheus/PromSelfMonitor.cpp | 2 +- core/prometheus/PromSelfMonitor.h | 2 +- core/prometheus/PrometheusInputRunner.h | 2 +- core/prometheus/schedulers/ScrapeScheduler.h | 2 +- core/unittest/monitor/CMakeLists.txt | 4 + .../monitor/PluginMetricManagerUnittest.cpp | 2 +- .../SelfMonitorMetricEventUnittest.cpp | 207 ++++++++++++++++++ 41 files changed, 500 insertions(+), 544 deletions(-) delete mode 100644 core/monitor/MetricStore.cpp delete mode 100644 core/monitor/MetricStore.h rename core/monitor/{ => metric_models}/MetricRecord.cpp (100%) rename core/monitor/{ => metric_models}/MetricRecord.h (100%) rename core/monitor/{ => metric_models}/MetricTypes.h (100%) rename core/monitor/{PluginMetricManager.cpp => metric_models/ReentrantMetricsRecord.cpp} (99%) rename core/monitor/{PluginMetricManager.h => metric_models/ReentrantMetricsRecord.h} (100%) create mode 100644 core/monitor/metric_models/SelfMonitorMetricEvent.cpp create mode 100644 core/monitor/metric_models/SelfMonitorMetricEvent.h create mode 100644 core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index b1b91aaf73..3dd6af2c93 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -114,7 +114,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/plugin/flusher/links.cmake) # Subdirectories (modules). except for common, input, processor, flusher, observer, helper, spl, and provider. set(SUB_DIRECTORIES_LIST - application app_config checkpoint container_manager metadata logger go_pipeline monitor monitor/metric_constants monitor/profile_sender models + application app_config checkpoint container_manager metadata logger go_pipeline monitor monitor/metric_constants monitor/metric_models monitor/profile_sender models config config/watcher constants pipeline pipeline/batch pipeline/limiter pipeline/plugin pipeline/plugin/creator pipeline/plugin/instance pipeline/plugin/interface pipeline/queue pipeline/route pipeline/serializer task_pipeline diff --git a/core/app_config/AppConfig.cpp b/core/app_config/AppConfig.cpp index d492af6b7b..202d9f67f1 100644 --- a/core/app_config/AppConfig.cpp +++ b/core/app_config/AppConfig.cpp @@ -430,22 +430,6 @@ string GetAgentLogName() { } } -string GetAgentSnapshotDir() { - if (BOOL_FLAG(logtail_mode)) { - return GetProcessExecutionDir() + STRING_FLAG(logtail_snapshot_dir); - } else { - return GetAgentLogDir() + "snapshot"; - } -} - -string GetAgentStatusLogName() { - if (BOOL_FLAG(logtail_mode)) { - return "ilogtail_status.LOG"; - } else { - return "loongcollector_status.LOG"; - } -} - string GetObserverEbpfHostPath() { if (BOOL_FLAG(logtail_mode)) { return STRING_FLAG(sls_observer_ebpf_host_path); @@ -909,7 +893,6 @@ void AppConfig::LoadResourceConf(const Json::Value& confJson) { mSendRequestConcurrency = confJson["send_request_concurrency"].asInt(); else mSendRequestConcurrency = INT32_FLAG(send_request_concurrency); - LogtailMonitor::GetInstance()->UpdateConstMetric("send_request_concurrency", mSendRequestConcurrency); if (confJson.isMember("process_thread_count") && confJson["process_thread_count"].isInt()) mProcessThreadCount = confJson["process_thread_count"].asInt(); diff --git a/core/app_config/AppConfig.h b/core/app_config/AppConfig.h index 25cdb0fdcb..a1f0af7ec5 100644 --- a/core/app_config/AppConfig.h +++ b/core/app_config/AppConfig.h @@ -48,8 +48,6 @@ std::string GetLocalEventDataFileName(); std::string GetInotifyWatcherDirsDumpFileName(); std::string GetAgentLoggersPrefix(); std::string GetAgentLogName(); -std::string GetAgentSnapshotDir(); -std::string GetAgentStatusLogName(); std::string GetObserverEbpfHostPath(); std::string GetSendBufferFileNamePrefix(); std::string GetLegacyUserLocalConfigFilePath(); diff --git a/core/application/Application.cpp b/core/application/Application.cpp index 09901cced9..34564e238a 100644 --- a/core/application/Application.cpp +++ b/core/application/Application.cpp @@ -133,7 +133,6 @@ void Application::Init() { const string& configIP = AppConfig::GetInstance()->GetConfigIP(); if (!configIP.empty()) { LoongCollectorMonitor::mIpAddr = configIP; - LogtailMonitor::GetInstance()->UpdateConstMetric("logtail_ip", GetHostIp()); } else if (!interface.empty()) { LoongCollectorMonitor::mIpAddr = GetHostIp(interface); if (LoongCollectorMonitor::mIpAddr.empty()) { @@ -151,7 +150,6 @@ void Application::Init() { const string& configHostName = AppConfig::GetInstance()->GetConfigHostName(); if (!configHostName.empty()) { LoongCollectorMonitor::mHostname = configHostName; - LogtailMonitor::GetInstance()->UpdateConstMetric("logtail_hostname", GetHostName()); } GenerateInstanceId(); @@ -198,7 +196,6 @@ void Application::Init() { void Application::Start() { // GCOVR_EXCL_START LoongCollectorMonitor::mStartTime = GetTimeStamp(time(NULL), "%Y-%m-%d %H:%M:%S"); - LogtailMonitor::GetInstance()->UpdateConstMetric("start_time", LoongCollectorMonitor::mStartTime); #if defined(__ENTERPRISE__) && defined(_MSC_VER) InitWindowsSignalObject(); diff --git a/core/ebpf/SelfMonitor.h b/core/ebpf/SelfMonitor.h index 48847f8229..7189f3e9c4 100644 --- a/core/ebpf/SelfMonitor.h +++ b/core/ebpf/SelfMonitor.h @@ -19,9 +19,9 @@ #include #include "ebpf/include/export.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" #include "common/Lock.h" -#include "monitor/MetricTypes.h" +#include "monitor/metric_models/MetricTypes.h" #include "monitor/metric_constants/MetricConstants.h" namespace logtail { diff --git a/core/ebpf/eBPFServer.cpp b/core/ebpf/eBPFServer.cpp index 501f833806..c87c14f63b 100644 --- a/core/ebpf/eBPFServer.cpp +++ b/core/ebpf/eBPFServer.cpp @@ -25,7 +25,7 @@ #include "ebpf/include/export.h" #include "common/LogtailCommonFlags.h" #include "common/MachineInfoUtil.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" #include "common/Lock.h" DEFINE_FLAG_INT64(kernel_min_version_for_ebpf, diff --git a/core/ebpf/eBPFServer.h b/core/ebpf/eBPFServer.h index 24f79347ea..0f21fcdf07 100644 --- a/core/ebpf/eBPFServer.h +++ b/core/ebpf/eBPFServer.h @@ -29,7 +29,7 @@ #include "ebpf/handler/AbstractHandler.h" #include "ebpf/handler/ObserveHandler.h" #include "ebpf/handler/SecurityHandler.h" -#include "monitor/MetricTypes.h" +#include "monitor/metric_models/MetricTypes.h" #include "ebpf/SelfMonitor.h" namespace logtail { diff --git a/core/ebpf/handler/AbstractHandler.h b/core/ebpf/handler/AbstractHandler.h index b71924be33..140efb83c5 100644 --- a/core/ebpf/handler/AbstractHandler.h +++ b/core/ebpf/handler/AbstractHandler.h @@ -17,7 +17,7 @@ #include #include "pipeline/PipelineContext.h" -#include "monitor/MetricTypes.h" +#include "monitor/metric_models/MetricTypes.h" #include "monitor/MetricManager.h" namespace logtail{ diff --git a/core/file_server/FileServer.h b/core/file_server/FileServer.h index ec539dabdf..e7c30e14ba 100644 --- a/core/file_server/FileServer.h +++ b/core/file_server/FileServer.h @@ -25,7 +25,7 @@ #include "file_server/MultilineOptions.h" #include "file_server/reader/FileReaderOptions.h" #include "monitor/MetricManager.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" #include "pipeline/PipelineContext.h" diff --git a/core/file_server/event_handler/LogInput.cpp b/core/file_server/event_handler/LogInput.cpp index 6765717922..340a6b6763 100644 --- a/core/file_server/event_handler/LogInput.cpp +++ b/core/file_server/event_handler/LogInput.cpp @@ -349,21 +349,10 @@ void LogInput::ProcessEvent(EventDispatcher* dispatcher, Event* ev) { } void LogInput::UpdateCriticalMetric(int32_t curTime) { - LogtailMonitor::GetInstance()->UpdateMetric("last_read_event_time", - GetTimeStamp(mLastReadEventTime, "%Y-%m-%d %H:%M:%S")); mLastRunTime->Set(mLastReadEventTime.load()); - - LogtailMonitor::GetInstance()->UpdateMetric("event_tps", - 1.0 * mEventProcessCount / (curTime - mLastUpdateMetricTime)); - int32_t openFdTotal = GloablFileDescriptorManager::GetInstance()->GetOpenedFilePtrSize(); - LogtailMonitor::GetInstance()->UpdateMetric("open_fd", openFdTotal); - LoongCollectorMonitor::GetInstance()->SetAgentOpenFdTotal(openFdTotal); - size_t handlerCount = EventDispatcher::GetInstance()->GetHandlerCount(); - LogtailMonitor::GetInstance()->UpdateMetric("register_handler", handlerCount); - mRegisterdHandlersTotal->Set(handlerCount); - LogtailMonitor::GetInstance()->UpdateMetric("reader_count", CheckPointManager::Instance()->GetReaderCount()); + LoongCollectorMonitor::GetInstance()->SetAgentOpenFdTotal(GloablFileDescriptorManager::GetInstance()->GetOpenedFilePtrSize()); + mRegisterdHandlersTotal->Set(EventDispatcher::GetInstance()->GetHandlerCount()); mActiveReadersTotal->Set(CheckPointManager::Instance()->GetReaderCount()); - LogtailMonitor::GetInstance()->UpdateMetric("multi_config", AppConfig::GetInstance()->IsAcceptMultiConfig()); mEventProcessCount = 0; } diff --git a/core/file_server/polling/PollingDirFile.cpp b/core/file_server/polling/PollingDirFile.cpp index 0b106605ff..f0f37b120e 100644 --- a/core/file_server/polling/PollingDirFile.cpp +++ b/core/file_server/polling/PollingDirFile.cpp @@ -160,17 +160,11 @@ void PollingDirFile::PollingIteration() { } sort(sortedConfigs.begin(), sortedConfigs.end(), FileDiscoveryOptions::CompareByPathLength); - size_t configTotal = nameConfigMap.size(); - LogtailMonitor::GetInstance()->UpdateMetric("config_count", configTotal); - LoongCollectorMonitor::GetInstance()->SetAgentConfigTotal(configTotal); + LoongCollectorMonitor::GetInstance()->SetAgentConfigTotal(nameConfigMap.size()); { ScopedSpinLock lock(mCacheLock); - size_t pollingDirCacheSize = mDirCacheMap.size(); - LogtailMonitor::GetInstance()->UpdateMetric("polling_dir_cache", pollingDirCacheSize); - mPollingDirCacheSize->Set(pollingDirCacheSize); - size_t pollingFileCacheSize = mFileCacheMap.size(); - LogtailMonitor::GetInstance()->UpdateMetric("polling_file_cache", pollingFileCacheSize); - mPollingFileCacheSize->Set(pollingFileCacheSize); + mPollingDirCacheSize->Set(mDirCacheMap.size()); + mPollingFileCacheSize->Set(mFileCacheMap.size()); } // Iterate all normal configs, make sure stat count will not exceed limit. diff --git a/core/file_server/polling/PollingModify.cpp b/core/file_server/polling/PollingModify.cpp index 33b7f3fad8..fcb2bb8eff 100644 --- a/core/file_server/polling/PollingModify.cpp +++ b/core/file_server/polling/PollingModify.cpp @@ -262,9 +262,7 @@ void PollingModify::PollingIteration() { vector deletedFileVec; vector pollingEventVec; int32_t statCount = 0; - size_t pollingModifySizeTotal = mModifyCacheMap.size(); - LogtailMonitor::GetInstance()->UpdateMetric("polling_modify_size", pollingModifySizeTotal); - mPollingModifySize->Set(pollingModifySizeTotal); + mPollingModifySize->Set(mModifyCacheMap.size()); for (auto iter = mModifyCacheMap.begin(); iter != mModifyCacheMap.end(); ++iter) { if (!mRuningFlag || mHoldOnFlag) break; diff --git a/core/logger/Logger.cpp b/core/logger/Logger.cpp index 701fa2738d..237a33511b 100644 --- a/core/logger/Logger.cpp +++ b/core/logger/Logger.cpp @@ -414,14 +414,6 @@ void Logger::LoadAllDefaultConfigs(std::map& loggerCf LoadDefaultConfig(loggerCfgs, sinkCfgs); loggerCfgs.insert({GetAgentLoggersPrefix(), LoggerConfig{"AsyncFileSink", level::info}}); - loggerCfgs.insert({GetAgentLoggersPrefix() + "/status", LoggerConfig{"AsyncFileSinkStatus", level::info}}); - - std::string dirPath = GetAgentSnapshotDir(); - if (!Mkdir(dirPath)) { - LogMsg(std::string("Create snapshot dir error ") + dirPath + ", error" + ErrnoToString(GetErrno())); - } - sinkCfgs.insert( - {"AsyncFileSinkStatus", SinkConfig{"AsyncFile", 61, 1, 1, dirPath + PATH_SEPARATOR + GetAgentStatusLogName()}}); } void Logger::EnsureSnapshotDirExist(std::map& sinkCfgs) { diff --git a/core/monitor/MetricManager.cpp b/core/monitor/MetricManager.cpp index be425a9ce7..21aa50c68a 100644 --- a/core/monitor/MetricManager.cpp +++ b/core/monitor/MetricManager.cpp @@ -18,10 +18,6 @@ #include "Monitor.h" #include "app_config/AppConfig.h" -#include "common/HashUtil.h" -#include "common/JsonUtil.h" -#include "common/StringTools.h" -#include "common/TimeUtil.h" #include "go_pipeline/LogtailPlugin.h" #include "logger/Logger.h" #include "provider/Provider.h" @@ -37,159 +33,6 @@ const string METRIC_KEY_LABEL = "label"; const string METRIC_TOPIC_TYPE = "loongcollector_metric"; const string METRIC_EXPORT_TYPE_GO = "direct"; const string METRIC_EXPORT_TYPE_CPP = "cpp_provided"; -const string METRIC_GO_KEY_LABELS = "labels"; -const string METRIC_GO_KEY_COUNTERS = "counters"; -const string METRIC_GO_KEY_GAUGES = "gauges"; - -SelfMonitorMetricEvent::SelfMonitorMetricEvent() { -} - -SelfMonitorMetricEvent::SelfMonitorMetricEvent(MetricsRecord* metricRecord) { - // category - mCategory = metricRecord->GetCategory(); - // labels - for (auto item = metricRecord->GetLabels()->begin(); item != metricRecord->GetLabels()->end(); ++item) { - pair pair = *item; - mLabels[pair.first] = pair.second; - } - for (auto item = metricRecord->GetDynamicLabels()->begin(); item != metricRecord->GetDynamicLabels()->end(); - ++item) { - pair> pair = *item; - string value = pair.second(); - mLabels[pair.first] = value; - } - // counters - for (auto& item : metricRecord->GetCounters()) { - mCounters[item->GetName()] = item->GetValue(); - } - for (auto& item : metricRecord->GetTimeCounters()) { - mCounters[item->GetName()] = item->GetValue(); - } - // gauges - for (auto& item : metricRecord->GetIntGauges()) { - mGauges[item->GetName()] = item->GetValue(); - } - for (auto& item : metricRecord->GetDoubleGauges()) { - mGauges[item->GetName()] = item->GetValue(); - } - CreateKey(); -} - -SelfMonitorMetricEvent::SelfMonitorMetricEvent(const std::map& metricRecord) { - Json::Value labels, counters, gauges; - string errMsg; - ParseJsonTable(metricRecord.at(METRIC_GO_KEY_LABELS), labels, errMsg); - ParseJsonTable(metricRecord.at(METRIC_GO_KEY_COUNTERS), counters, errMsg); - ParseJsonTable(metricRecord.at(METRIC_GO_KEY_GAUGES), gauges, errMsg); - // category - if (labels.isMember("metric_category")) { - mCategory = labels["metric_category"].asString(); - labels.removeMember("metric_category"); - } else { - mCategory = MetricCategory::METRIC_CATEGORY_UNKNOWN; - LOG_ERROR(sLogger, ("parse go metric", "labels")("err", "metric_category not found")); - } - // labels - for (Json::Value::const_iterator itr = labels.begin(); itr != labels.end(); ++itr) { - if (itr->isString()) { - mLabels[itr.key().asString()] = itr->asString(); - } - } - // counters - for (Json::Value::const_iterator itr = counters.begin(); itr != counters.end(); ++itr) { - if (itr->isUInt64()) { - mCounters[itr.key().asString()] = itr->asUInt64(); - } - if (itr->isDouble()) { - mCounters[itr.key().asString()] = static_cast(itr->asDouble()); - } - if (itr->isString()) { - try { - mCounters[itr.key().asString()] = static_cast(std::stod(itr->asString())); - } catch (...) { - mCounters[itr.key().asString()] = 0; - } - } - } - // gauges - for (Json::Value::const_iterator itr = gauges.begin(); itr != gauges.end(); ++itr) { - if (itr->isDouble()) { - mGauges[itr.key().asString()] = itr->asDouble(); - } - if (itr->isString()) { - try { - double value = std::stod(itr->asString()); - mGauges[itr.key().asString()] = value; - } catch (...) { - mGauges[itr.key().asString()] = 0; - } - } - } - CreateKey(); -} - -void SelfMonitorMetricEvent::CreateKey() { - string key = "category:" + mCategory; - for (auto label : mLabels) { - key += (";" + label.first + ":" + label.second); - } - mKey = HashString(key); - mUpdatedFlag = true; -} - -void SelfMonitorMetricEvent::SetInterval(size_t interval) { - mLastSendInterval = 0; - mSendInterval = interval; -} - -void SelfMonitorMetricEvent::Merge(SelfMonitorMetricEvent& event) { - if (mSendInterval != event.mSendInterval) { - mSendInterval = event.mSendInterval; - mLastSendInterval = 0; - } - for (auto counter = event.mCounters.begin(); counter != event.mCounters.end(); counter++) { - if (mCounters.find(counter->first) != mCounters.end()) - mCounters[counter->first] += counter->second; - else - mCounters[counter->first] = counter->second; - } - for (auto gauge = event.mGauges.begin(); gauge != event.mGauges.end(); gauge++) { - mGauges[gauge->first] = gauge->second; - } - mUpdatedFlag = true; -} - -bool SelfMonitorMetricEvent::ShouldSend() { - mLastSendInterval++; - return (mLastSendInterval >= mSendInterval) && mUpdatedFlag; -} - -bool SelfMonitorMetricEvent::ShouldDelete() { - return (mLastSendInterval >= mSendInterval) && !mUpdatedFlag; -} - -void SelfMonitorMetricEvent::ReadAsMetricEvent(MetricEvent* metricEventPtr) { - // time - metricEventPtr->SetTimestamp(GetCurrentLogtailTime().tv_sec); - // __tag__ - for (auto label = mLabels.begin(); label != mLabels.end(); label++) { - metricEventPtr->SetTag(label->first, label->second); - } - // name - metricEventPtr->SetName(mCategory); - // values - metricEventPtr->SetValue({}); - for (auto counter = mCounters.begin(); counter != mCounters.end(); counter++) { - metricEventPtr->MutableValue()->SetValue(counter->first, counter->second); - counter->second = 0; - } - for (auto gauge = mGauges.begin(); gauge != mGauges.end(); gauge++) { - metricEventPtr->MutableValue()->SetValue(gauge->first, gauge->second); - } - // set flags - mLastSendInterval = 0; - mUpdatedFlag = false; -} WriteMetrics::~WriteMetrics() { Clear(); @@ -391,7 +234,6 @@ void ReadMetrics::UpdateGoCppProvidedMetrics(vector>& metric if (metric.first == METRIC_AGENT_GO_ROUTINES_TOTAL) { LoongCollectorMonitor::GetInstance()->SetAgentGoRoutinesTotal(stoi(metric.second)); } - LogtailMonitor::GetInstance()->UpdateMetric(metric.first, metric.second); } } } diff --git a/core/monitor/MetricManager.h b/core/monitor/MetricManager.h index e87a022da1..3f28b9b477 100644 --- a/core/monitor/MetricManager.h +++ b/core/monitor/MetricManager.h @@ -22,57 +22,16 @@ #include #include -#include "MetricRecord.h" #include "common/Lock.h" #include "models/PipelineEventGroup.h" -#include "protobuf/sls/sls_logs.pb.h" +#include "monitor/metric_constants/MetricConstants.h" +#include "monitor/metric_models/MetricRecord.h" +#include "monitor/metric_models/SelfMonitorMetricEvent.h" namespace logtail { extern const std::string METRIC_TOPIC_TYPE; -struct SelfMonitorMetricRule { - bool mEnable; - size_t mInterval; -}; - -struct SelfMonitorMetricRules { - SelfMonitorMetricRule mAgentMetricsRule; - SelfMonitorMetricRule mRunnerMetricsRule; - SelfMonitorMetricRule mPipelineMetricsRule; - SelfMonitorMetricRule mPluginSourceMetricsRule; - SelfMonitorMetricRule mPluginMetricsRule; - SelfMonitorMetricRule mComponentMetricsRule; -}; - -using SelfMonitorMetricEventKey = int64_t; -class SelfMonitorMetricEvent { -public: - SelfMonitorMetricEvent(); - SelfMonitorMetricEvent(MetricsRecord* metricRecord); - SelfMonitorMetricEvent(const std::map& metricRecord); - - void SetInterval(size_t interval); - void Merge(SelfMonitorMetricEvent& event); - - bool ShouldSend(); - bool ShouldDelete(); - void ReadAsMetricEvent(MetricEvent* metricEventPtr); - - SelfMonitorMetricEventKey mKey; // labels + category - std::string mCategory; // category -private: - void CreateKey(); - - std::unordered_map mLabels; - std::unordered_map mCounters; - std::unordered_map mGauges; - int32_t mSendInterval; - int32_t mLastSendInterval; - bool mUpdatedFlag; -}; -using SelfMonitorMetricEventMap = std::unordered_map; - class WriteMetrics { private: WriteMetrics() = default; diff --git a/core/monitor/MetricStore.cpp b/core/monitor/MetricStore.cpp deleted file mode 100644 index 8594329259..0000000000 --- a/core/monitor/MetricStore.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2022 iLogtail Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "MetricStore.h" -#include - -namespace logtail { - -std::string MetricStore::MetricToString() { - Json::Value rootValue; - ScopedSpinLock lock(mMonitorMetricLock); - auto iter = mLogtailMetric.begin(); - for (; iter != mLogtailMetric.end(); ++iter) { - rootValue[iter->first] = Json::Value(iter->second); - } - for (iter = mConstLogtailMetric.begin(); iter != mConstLogtailMetric.end(); ++iter) { - rootValue[iter->first] = Json::Value(iter->second); - } - return rootValue.toStyledString(); -} - -std::string MetricStore::CheckLogtailStatus() { - std::string metricStr; - int32_t processFull = 0; - int32_t sendFull = 0; - int32_t senderInvalid = 0; - int32_t openFdCount = 0; - double processTps = 0.; - - ScopedSpinLock lock(mMonitorMetricLock); - auto iter = mLogtailMetric.find("process_queue_full"); - if (iter != mLogtailMetric.end()) { - processFull = StringTo(iter->second); - } - - iter = mLogtailMetric.find("send_queue_full"); - if (iter != mLogtailMetric.end()) { - sendFull = StringTo(iter->second); - } - - iter = mLogtailMetric.find("sender_invalid"); - if (iter != mLogtailMetric.end()) { - senderInvalid = StringTo(iter->second); - } - - iter = mLogtailMetric.find("open_fd"); - if (iter != mLogtailMetric.end()) { - openFdCount = StringTo(iter->second); - } - - iter = mLogtailMetric.find("process_tps"); - if (iter != mLogtailMetric.end()) { - processTps = StringTo(iter->second); - } - - metricStr = "ok"; - if (processTps > 20.) - metricStr = "busy"; - if (openFdCount > 1000) - metricStr = "many_log_files"; - if (processFull > 0) - metricStr = "process_block"; - if (sendFull > 0) - metricStr = "send_block"; - if (senderInvalid > 0) - metricStr = "send_error"; - return metricStr; -} - -} // namespace logtail \ No newline at end of file diff --git a/core/monitor/MetricStore.h b/core/monitor/MetricStore.h deleted file mode 100644 index 5d3868d3ff..0000000000 --- a/core/monitor/MetricStore.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2022 iLogtail Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once -#include -#include "common/StringTools.h" -#include "common/Lock.h" - -namespace logtail { - -class MetricStore { -public: - template - void UpdateMetric(const std::string key, const T& val) { - ScopedSpinLock lock(mMonitorMetricLock); - mLogtailMetric[key] = ToString(val); - } - - template - void UpdateConstMetric(const std::string key, const T& val) { - ScopedSpinLock lock(mMonitorMetricLock); - mConstLogtailMetric[key] = ToString(val); - } - -protected: - // Clear mutable metrics. - void ClearMetric() { - ScopedSpinLock lock(mMonitorMetricLock); - mLogtailMetric.clear(); - } - - // MetricToString dumps stored metrics to string in JSON format. - std::string MetricToString(); - - // CheckLogtailStatus checks metrics status and return a string reprensent status. - std::string CheckLogtailStatus(); - - // Metrics and corresponding lock. - SpinLock mMonitorMetricLock; - // mConstLogtailMetric is used to store immutable metrics (assumption), such as IP. - std::unordered_map mConstLogtailMetric; - std::unordered_map mLogtailMetric; -}; - -} // namespace logtail \ No newline at end of file diff --git a/core/monitor/Monitor.cpp b/core/monitor/Monitor.cpp index 5d9f40e74e..34fabe9059 100644 --- a/core/monitor/Monitor.cpp +++ b/core/monitor/Monitor.cpp @@ -229,21 +229,9 @@ void LogtailMonitor::Monitor() { SendStatusProfile(true); } -template -static void AddLogContent(sls_logs::Log* log, const char* key, const T& val) { - auto content = log->add_contents(); - content->set_key(key); - content->set_value(ToString(val)); -} - bool LogtailMonitor::SendStatusProfile(bool suicide) { mStatusCount++; - string category; - if (suicide) - category = "logtail_suicide_profile"; - else if (mStatusCount % 2 == 0) - category = "logtail_status_profile"; - else + if (!suicide && mStatusCount % 2 != 0) return false; auto now = GetCurrentLogtailTime(); @@ -258,100 +246,12 @@ bool LogtailMonitor::SendStatusProfile(bool suicide) { sleep(10); _exit(1); } - - // the unique id of current instance - std::string id = sdk::Base64Enconde(LoongCollectorMonitor::mHostname + LoongCollectorMonitor::mIpAddr - + ILOGTAIL_VERSION + GetProcessExecutionDir()); - - // Collect status information to send. - LogGroup logGroup; - logGroup.set_category(category); - logGroup.set_source(LoongCollectorMonitor::mIpAddr); - Log* logPtr = logGroup.add_logs(); - SetLogTime(logPtr, AppConfig::GetInstance()->EnableLogTimeAutoAdjust() ? now.tv_sec + GetTimeDelta() : now.tv_sec); // CPU usage of Logtail process. - AddLogContent(logPtr, "cpu", mCpuStat.mCpuUsage); LoongCollectorMonitor::GetInstance()->SetAgentCpu(mCpuStat.mCpuUsage); -#if defined(__linux__) // TODO: Remove this if auto scale is available on Windows. - // CPU usage of system. - AddLogContent(logPtr, "os_cpu", mOsCpuStatForScale.mOsCpuUsage); -#endif // Memory usage of Logtail process. - AddLogContent(logPtr, "mem", mMemStat.mRss); LoongCollectorMonitor::GetInstance()->SetAgentMemory(mMemStat.mRss); - // The version, uuid of Logtail. - AddLogContent(logPtr, "version", ILOGTAIL_VERSION); - AddLogContent(logPtr, "uuid", Application::GetInstance()->GetUUID()); -#ifdef __ENTERPRISE__ - AddLogContent(logPtr, "user_defined_id", EnterpriseConfigProvider::GetInstance()->GetUserDefinedIdSet()); - AddLogContent(logPtr, "aliuids", EnterpriseConfigProvider::GetInstance()->GetAliuidSet()); -#endif - AddLogContent(logPtr, "projects", FlusherSLS::GetAllProjects()); - AddLogContent(logPtr, "instance_id", Application::GetInstance()->GetInstanceId()); - AddLogContent(logPtr, "instance_key", id); - // Host informations. - AddLogContent(logPtr, "ip", LoongCollectorMonitor::mIpAddr); - AddLogContent(logPtr, "hostname", LoongCollectorMonitor::mHostname); - AddLogContent(logPtr, "os", OS_NAME); - AddLogContent(logPtr, "os_detail", LoongCollectorMonitor::mOsDetail); - AddLogContent(logPtr, "user", LoongCollectorMonitor::mUsername); -#if defined(__linux__) - AddLogContent(logPtr, "load", GetLoadAvg()); -#endif - AddLogContent(logPtr, "plugin_stats", PipelineManager::GetInstance()->GetPluginStatistics()); - // Metrics. - vector allProfileRegion; - GetProfileSender()->GetAllProfileRegion(allProfileRegion); - UpdateMetric("region", allProfileRegion); -#ifdef __ENTERPRISE__ - UpdateMetric("config_update_count", EnterpriseConfigProvider::GetInstance()->GetConfigUpdateTotalCount()); - UpdateMetric("config_update_item_count", EnterpriseConfigProvider::GetInstance()->GetConfigUpdateItemTotalCount()); - UpdateMetric("config_update_last_time", - GetTimeStamp(EnterpriseConfigProvider::GetInstance()->GetLastConfigUpdateTime(), "%Y-%m-%d %H:%M:%S")); - UpdateMetric("config_get_last_time", - GetTimeStamp(EnterpriseConfigProvider::GetInstance()->GetLastConfigGetTime(), "%Y-%m-%d %H:%M:%S")); -#endif - UpdateMetric("config_prefer_real_ip", BOOL_FLAG(send_prefer_real_ip)); - UpdateMetric("plugin_enabled", LogtailPlugin::GetInstance()->IsPluginOpened()); - const std::vector& envTags = AppConfig::GetInstance()->GetEnvTags(); - if (!envTags.empty()) { - UpdateMetric("env_config_count", envTags.size()); - } - int32_t usedSendingConcurrency = FlusherRunner::GetInstance()->GetSendingBufferCount(); - UpdateMetric("used_sending_concurrency", usedSendingConcurrency); - - AddLogContent(logPtr, "metric_json", MetricToString()); - AddLogContent(logPtr, "status", CheckLogtailStatus()); - AddLogContent(logPtr, "ecs_instance_id", LoongCollectorMonitor::mECSInstanceID); - AddLogContent(logPtr, "ecs_user_id", LoongCollectorMonitor::mECSUserID); - AddLogContent(logPtr, "ecs_regioon_id", LoongCollectorMonitor::mECSRegionID); - ClearMetric(); - - if (!mIsThreadRunning) - return false; - - // Dump to local and send to enabled regions. - DumpToLocal(logGroup); - for (size_t i = 0; i < allProfileRegion.size(); ++i) { - if (BOOL_FLAG(check_profile_region) && !FlusherSLS::IsRegionContainingConfig(allProfileRegion[i])) { - LOG_DEBUG(sLogger, ("region does not contain config for this instance", allProfileRegion[i])); - continue; - } - // Check if the region is disabled. - if (!FlusherSLS::GetRegionStatus(allProfileRegion[i])) { - LOG_DEBUG(sLogger, ("disabled region, do not send status profile to region", allProfileRegion[i])); - continue; - } - - if (i == allProfileRegion.size() - 1) { - GetProfileSender()->SendToProfileProject(allProfileRegion[i], logGroup); - } else { - LogGroup copyLogGroup = logGroup; - GetProfileSender()->SendToProfileProject(allProfileRegion[i], copyLogGroup); - } - } - return true; + return mIsThreadRunning; } bool LogtailMonitor::GetMemStat() { @@ -473,24 +373,6 @@ bool LogtailMonitor::CheckHardMemLimit() { return mMemStat.mRss > 5 * AppConfig::GetInstance()->GetMemUsageUpLimit(); } -void LogtailMonitor::DumpToLocal(const sls_logs::LogGroup& logGroup) { - string dumpStr = "\n####logtail status####\n"; - for (int32_t logIdx = 0; logIdx < logGroup.logs_size(); ++logIdx) { - Json::Value category; - const Log& log = logGroup.logs(logIdx); - for (int32_t conIdx = 0; conIdx < log.contents_size(); ++conIdx) { - const Log_Content& content = log.contents(conIdx); - const string& key = content.key(); - const string& value = content.value(); - dumpStr.append(key).append(":").append(value).append("\n"); - } - } - dumpStr += "####status end####\n"; - - static auto gMonitorLogger = Logger::Instance().GetLogger(GetAgentLoggersPrefix() + "/status"); - LOG_INFO(gMonitorLogger, ("\n", dumpStr)); -} - bool LogtailMonitor::DumpMonitorInfo(time_t monitorTime) { string path = GetAgentLogDir() + GetMonitorInfoFileName(); ofstream outfile(path.c_str(), ofstream::app); @@ -756,7 +638,6 @@ void LoongCollectorMonitor::Init() { void LoongCollectorMonitor::Stop() { SelfMonitorServer::GetInstance()->Stop(); LOG_INFO(sLogger, ("LoongCollector monitor", "stopped successfully")); - } } // namespace logtail diff --git a/core/monitor/Monitor.h b/core/monitor/Monitor.h index c66a47218e..355ff2e2c0 100644 --- a/core/monitor/Monitor.h +++ b/core/monitor/Monitor.h @@ -21,9 +21,7 @@ #include #include -#include "MetricConstants.h" #include "MetricManager.h" -#include "MetricStore.h" #if defined(_MSC_VER) #include @@ -78,7 +76,7 @@ struct OsCpuStat { } }; -class LogtailMonitor : public MetricStore { +class LogtailMonitor { public: LogtailMonitor(const LogtailMonitor&) = delete; LogtailMonitor& operator=(const LogtailMonitor&) = delete; @@ -124,9 +122,6 @@ class LogtailMonitor : public MetricStore { // several seconds after calling this method and before _exit(1). bool SendStatusProfile(bool suicide); - // DumpToLocal dumps the @logGroup to local status log. - void DumpToLocal(const sls_logs::LogGroup& logGroup); - // DumpMonitorInfo dumps simple monitor information to local. bool DumpMonitorInfo(time_t monitorTime); diff --git a/core/monitor/MetricRecord.cpp b/core/monitor/metric_models/MetricRecord.cpp similarity index 100% rename from core/monitor/MetricRecord.cpp rename to core/monitor/metric_models/MetricRecord.cpp diff --git a/core/monitor/MetricRecord.h b/core/monitor/metric_models/MetricRecord.h similarity index 100% rename from core/monitor/MetricRecord.h rename to core/monitor/metric_models/MetricRecord.h diff --git a/core/monitor/MetricTypes.h b/core/monitor/metric_models/MetricTypes.h similarity index 100% rename from core/monitor/MetricTypes.h rename to core/monitor/metric_models/MetricTypes.h diff --git a/core/monitor/PluginMetricManager.cpp b/core/monitor/metric_models/ReentrantMetricsRecord.cpp similarity index 99% rename from core/monitor/PluginMetricManager.cpp rename to core/monitor/metric_models/ReentrantMetricsRecord.cpp index 58719241d9..33b9a7a5f4 100644 --- a/core/monitor/PluginMetricManager.cpp +++ b/core/monitor/metric_models/ReentrantMetricsRecord.cpp @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "PluginMetricManager.h" +#include "ReentrantMetricsRecord.h" namespace logtail { diff --git a/core/monitor/PluginMetricManager.h b/core/monitor/metric_models/ReentrantMetricsRecord.h similarity index 100% rename from core/monitor/PluginMetricManager.h rename to core/monitor/metric_models/ReentrantMetricsRecord.h diff --git a/core/monitor/metric_models/SelfMonitorMetricEvent.cpp b/core/monitor/metric_models/SelfMonitorMetricEvent.cpp new file mode 100644 index 0000000000..e35698e359 --- /dev/null +++ b/core/monitor/metric_models/SelfMonitorMetricEvent.cpp @@ -0,0 +1,187 @@ +/* + * Copyright 2023 iLogtail Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SelfMonitorMetricEvent.h" + +#include "common/HashUtil.h" +#include "common/JsonUtil.h" +#include "common/TimeUtil.h" +#include "logger/Logger.h" + +using namespace std; + +namespace logtail { + +const string METRIC_GO_KEY_LABELS = "labels"; +const string METRIC_GO_KEY_COUNTERS = "counters"; +const string METRIC_GO_KEY_GAUGES = "gauges"; + +SelfMonitorMetricEvent::SelfMonitorMetricEvent() { +} + +SelfMonitorMetricEvent::SelfMonitorMetricEvent(MetricsRecord* metricRecord) { + // category + mCategory = metricRecord->GetCategory(); + // labels + for (auto item = metricRecord->GetLabels()->begin(); item != metricRecord->GetLabels()->end(); ++item) { + pair pair = *item; + mLabels[pair.first] = pair.second; + } + for (auto item = metricRecord->GetDynamicLabels()->begin(); item != metricRecord->GetDynamicLabels()->end(); + ++item) { + pair> pair = *item; + string value = pair.second(); + mLabels[pair.first] = value; + } + // counters + for (auto& item : metricRecord->GetCounters()) { + mCounters[item->GetName()] = item->GetValue(); + } + for (auto& item : metricRecord->GetTimeCounters()) { + mCounters[item->GetName()] = item->GetValue(); + } + // gauges + for (auto& item : metricRecord->GetIntGauges()) { + mGauges[item->GetName()] = item->GetValue(); + } + for (auto& item : metricRecord->GetDoubleGauges()) { + mGauges[item->GetName()] = item->GetValue(); + } + CreateKey(); +} + +SelfMonitorMetricEvent::SelfMonitorMetricEvent(const std::map& metricRecord) { + Json::Value labels, counters, gauges; + string errMsg; + ParseJsonTable(metricRecord.at(METRIC_GO_KEY_LABELS), labels, errMsg); + if (!errMsg.empty()) { + mCategory = MetricCategory::METRIC_CATEGORY_UNKNOWN; + LOG_ERROR(sLogger, ("parse go metric", "labels")("err", errMsg)); + return; + } + ParseJsonTable(metricRecord.at(METRIC_GO_KEY_COUNTERS), counters, errMsg); + if (!errMsg.empty()) { + LOG_ERROR(sLogger, ("parse go metric", "counters")("err", errMsg)); + } + ParseJsonTable(metricRecord.at(METRIC_GO_KEY_GAUGES), gauges, errMsg); + if (!errMsg.empty()) { + LOG_ERROR(sLogger, ("parse go metric", "gauges")("err", errMsg)); + } + // category + if (labels.isMember("metric_category")) { + mCategory = labels["metric_category"].asString(); + labels.removeMember("metric_category"); + } else { + mCategory = MetricCategory::METRIC_CATEGORY_UNKNOWN; + LOG_ERROR(sLogger, ("parse go metric", "labels")("err", "metric_category not found")); + } + // labels + for (Json::Value::const_iterator itr = labels.begin(); itr != labels.end(); ++itr) { + if (itr->isString()) { + mLabels[itr.key().asString()] = itr->asString(); + } + } + // counters + for (Json::Value::const_iterator itr = counters.begin(); itr != counters.end(); ++itr) { + if (itr->isString()) { + try { + mCounters[itr.key().asString()] = static_cast(std::stod(itr->asString())); + } catch (...) { // catch std::invalid_argument & std::out_of_range + mCounters[itr.key().asString()] = 0; + } + } + } + // gauges + for (Json::Value::const_iterator itr = gauges.begin(); itr != gauges.end(); ++itr) { + if (itr->isDouble()) { + mGauges[itr.key().asString()] = itr->asDouble(); + } + if (itr->isString()) { + try { + double value = std::stod(itr->asString()); + mGauges[itr.key().asString()] = value; + } catch (...) { + mGauges[itr.key().asString()] = 0; + } + } + } + CreateKey(); +} + +void SelfMonitorMetricEvent::CreateKey() { + string key = "category:" + mCategory; + for (auto label : mLabels) { + key += (";" + label.first + ":" + label.second); + } + mKey = HashString(key); + mUpdatedFlag = true; +} + +void SelfMonitorMetricEvent::SetInterval(size_t interval) { + mLastSendInterval = 0; + mSendInterval = interval; +} + +void SelfMonitorMetricEvent::Merge(SelfMonitorMetricEvent& event) { + if (mSendInterval != event.mSendInterval) { + mSendInterval = event.mSendInterval; + mLastSendInterval = 0; + } + for (auto counter = event.mCounters.begin(); counter != event.mCounters.end(); counter++) { + if (mCounters.find(counter->first) != mCounters.end()) + mCounters[counter->first] += counter->second; + else + mCounters[counter->first] = counter->second; + } + for (auto gauge = event.mGauges.begin(); gauge != event.mGauges.end(); gauge++) { + mGauges[gauge->first] = gauge->second; + } + mUpdatedFlag = true; +} + +bool SelfMonitorMetricEvent::ShouldSend() { + mLastSendInterval++; + return (mLastSendInterval >= mSendInterval) && mUpdatedFlag; +} + +bool SelfMonitorMetricEvent::ShouldDelete() { + return (mLastSendInterval >= mSendInterval) && !mUpdatedFlag; +} + +void SelfMonitorMetricEvent::ReadAsMetricEvent(MetricEvent* metricEventPtr) { + // time + metricEventPtr->SetTimestamp(GetCurrentLogtailTime().tv_sec); + // __tag__ + for (auto label = mLabels.begin(); label != mLabels.end(); label++) { + metricEventPtr->SetTag(label->first, label->second); + } + // name + metricEventPtr->SetName(mCategory); + // values + metricEventPtr->SetValue(UntypedMultiDoubleValues{{}, nullptr}); + for (auto counter = mCounters.begin(); counter != mCounters.end(); counter++) { + metricEventPtr->MutableValue()->SetValue(counter->first, counter->second); + counter->second = 0; + } + for (auto gauge = mGauges.begin(); gauge != mGauges.end(); gauge++) { + metricEventPtr->MutableValue()->SetValue(gauge->first, gauge->second); + } + // set flags + mLastSendInterval = 0; + mUpdatedFlag = false; +} + +} // namespace logtail \ No newline at end of file diff --git a/core/monitor/metric_models/SelfMonitorMetricEvent.h b/core/monitor/metric_models/SelfMonitorMetricEvent.h new file mode 100644 index 0000000000..ff0a8cc7ea --- /dev/null +++ b/core/monitor/metric_models/SelfMonitorMetricEvent.h @@ -0,0 +1,69 @@ +/* + * Copyright 2023 iLogtail Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include "MetricRecord.h" +#include "models/PipelineEventGroup.h" + +namespace logtail { + +struct SelfMonitorMetricRule { + bool mEnable; + size_t mInterval; +}; + +struct SelfMonitorMetricRules { + SelfMonitorMetricRule mAgentMetricsRule; + SelfMonitorMetricRule mRunnerMetricsRule; + SelfMonitorMetricRule mPipelineMetricsRule; + SelfMonitorMetricRule mPluginSourceMetricsRule; + SelfMonitorMetricRule mPluginMetricsRule; + SelfMonitorMetricRule mComponentMetricsRule; +}; + +using SelfMonitorMetricEventKey = int64_t; +class SelfMonitorMetricEvent { +public: + SelfMonitorMetricEvent(); + SelfMonitorMetricEvent(MetricsRecord* metricRecord); + SelfMonitorMetricEvent(const std::map& metricRecord); + + void SetInterval(size_t interval); + void Merge(SelfMonitorMetricEvent& event); + + bool ShouldSend(); + bool ShouldDelete(); + void ReadAsMetricEvent(MetricEvent* metricEventPtr); + + SelfMonitorMetricEventKey mKey; // labels + category + std::string mCategory; // category +private: + void CreateKey(); + + std::unordered_map mLabels; + std::unordered_map mCounters; + std::unordered_map mGauges; + int32_t mSendInterval; + int32_t mLastSendInterval; + bool mUpdatedFlag; + +#ifdef APSARA_UNIT_TEST_MAIN + friend class SelfMonitorMetricEventUnittest; +#endif +}; +using SelfMonitorMetricEventMap = std::unordered_map; + +} // namespace logtail \ No newline at end of file diff --git a/core/pipeline/plugin/instance/FlusherInstance.h b/core/pipeline/plugin/instance/FlusherInstance.h index 68089f60b1..4102616541 100644 --- a/core/pipeline/plugin/instance/FlusherInstance.h +++ b/core/pipeline/plugin/instance/FlusherInstance.h @@ -21,7 +21,7 @@ #include #include "models/PipelineEventGroup.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" #include "pipeline/PipelineContext.h" #include "pipeline/plugin/instance/PluginInstance.h" #include "pipeline/plugin/interface/Flusher.h" diff --git a/core/plugin/input/InputContainerStdio.h b/core/plugin/input/InputContainerStdio.h index a9d1e51aed..f5955b4fa6 100644 --- a/core/plugin/input/InputContainerStdio.h +++ b/core/plugin/input/InputContainerStdio.h @@ -21,7 +21,7 @@ #include "container_manager/ContainerDiscoveryOptions.h" #include "file_server/FileDiscoveryOptions.h" #include "file_server/MultilineOptions.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" #include "pipeline/plugin/interface/Input.h" #include "file_server/reader/FileReaderOptions.h" diff --git a/core/plugin/input/InputFile.h b/core/plugin/input/InputFile.h index ee8275ef7c..4d966f1e67 100644 --- a/core/plugin/input/InputFile.h +++ b/core/plugin/input/InputFile.h @@ -21,7 +21,7 @@ #include "container_manager/ContainerDiscoveryOptions.h" #include "file_server/FileDiscoveryOptions.h" #include "file_server/MultilineOptions.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" #include "pipeline/plugin/interface/Input.h" #include "file_server/reader/FileReaderOptions.h" diff --git a/core/plugin/input/InputFileSecurity.h b/core/plugin/input/InputFileSecurity.h index fea0b459fc..ebff74842f 100644 --- a/core/plugin/input/InputFileSecurity.h +++ b/core/plugin/input/InputFileSecurity.h @@ -21,7 +21,7 @@ #include "ebpf/config.h" #include "pipeline/plugin/interface/Input.h" #include "ebpf/eBPFServer.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" namespace logtail { diff --git a/core/plugin/input/InputNetworkObserver.cpp b/core/plugin/input/InputNetworkObserver.cpp index ce9c4218ff..564d2244f0 100644 --- a/core/plugin/input/InputNetworkObserver.cpp +++ b/core/plugin/input/InputNetworkObserver.cpp @@ -18,7 +18,7 @@ #include "ebpf/eBPFServer.h" #include "ebpf/config.h" #include "logger/Logger.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" using namespace std; diff --git a/core/plugin/input/InputNetworkObserver.h b/core/plugin/input/InputNetworkObserver.h index 7f204a2c90..33bf4ab0cc 100644 --- a/core/plugin/input/InputNetworkObserver.h +++ b/core/plugin/input/InputNetworkObserver.h @@ -21,7 +21,7 @@ #include "ebpf/config.h" #include "pipeline/plugin/interface/Input.h" #include "ebpf/include/export.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" namespace logtail { diff --git a/core/plugin/input/InputNetworkSecurity.h b/core/plugin/input/InputNetworkSecurity.h index cda3a7c170..18023fc28e 100644 --- a/core/plugin/input/InputNetworkSecurity.h +++ b/core/plugin/input/InputNetworkSecurity.h @@ -20,7 +20,7 @@ #include "ebpf/config.h" #include "pipeline/plugin/interface/Input.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" namespace logtail { diff --git a/core/plugin/input/InputProcessSecurity.h b/core/plugin/input/InputProcessSecurity.h index d26d7a95e3..d731b2b8fb 100644 --- a/core/plugin/input/InputProcessSecurity.h +++ b/core/plugin/input/InputProcessSecurity.h @@ -20,7 +20,7 @@ #include "ebpf/config.h" #include "pipeline/plugin/interface/Input.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" namespace logtail { diff --git a/core/prometheus/PromSelfMonitor.cpp b/core/prometheus/PromSelfMonitor.cpp index 17f2e701e1..410195acaf 100644 --- a/core/prometheus/PromSelfMonitor.cpp +++ b/core/prometheus/PromSelfMonitor.cpp @@ -4,7 +4,7 @@ #include #include -#include "monitor/MetricTypes.h" +#include "monitor/metric_models/MetricTypes.h" #include "monitor/metric_constants/MetricConstants.h" using namespace std; diff --git a/core/prometheus/PromSelfMonitor.h b/core/prometheus/PromSelfMonitor.h index 002e8f88ef..b762533791 100644 --- a/core/prometheus/PromSelfMonitor.h +++ b/core/prometheus/PromSelfMonitor.h @@ -5,7 +5,7 @@ #include #include "monitor/MetricManager.h" -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" namespace logtail { diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index 2823d562a4..996caf163e 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -22,7 +22,7 @@ #include "common/Lock.h" #include "common/timer/Timer.h" -#include "monitor/MetricTypes.h" +#include "monitor/metric_models/MetricTypes.h" #include "prometheus/schedulers/TargetSubscriberScheduler.h" #include "runner/InputRunner.h" #include "sdk/Common.h" diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index 00ac2d989a..78018325bd 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -22,7 +22,7 @@ #include "BaseScheduler.h" #include "common/http/HttpResponse.h" #include "models/PipelineEventGroup.h" -#include "monitor/MetricTypes.h" +#include "monitor/metric_models/MetricTypes.h" #include "pipeline/queue/QueueKey.h" #include "prometheus/PromSelfMonitor.h" #include "prometheus/Utils.h" diff --git a/core/unittest/monitor/CMakeLists.txt b/core/unittest/monitor/CMakeLists.txt index aa6544d052..d3e7c760f2 100644 --- a/core/unittest/monitor/CMakeLists.txt +++ b/core/unittest/monitor/CMakeLists.txt @@ -21,6 +21,10 @@ target_link_libraries(metric_manager_unittest ${UT_BASE_TARGET}) add_executable(plugin_metric_manager_unittest PluginMetricManagerUnittest.cpp) target_link_libraries(plugin_metric_manager_unittest ${UT_BASE_TARGET}) +add_executable(self_monitor_metric_event_unittest SelfMonitorMetricEventUnittest.cpp) +target_link_libraries(self_monitor_metric_event_unittest ${UT_BASE_TARGET}) + include(GoogleTest) gtest_discover_tests(metric_manager_unittest) gtest_discover_tests(plugin_metric_manager_unittest) +gtest_discover_tests(self_monitor_metric_event_unittest) diff --git a/core/unittest/monitor/PluginMetricManagerUnittest.cpp b/core/unittest/monitor/PluginMetricManagerUnittest.cpp index 41ef503c75..77246a322b 100644 --- a/core/unittest/monitor/PluginMetricManagerUnittest.cpp +++ b/core/unittest/monitor/PluginMetricManagerUnittest.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "monitor/PluginMetricManager.h" +#include "monitor/metric_models/ReentrantMetricsRecord.h" #include "monitor/metric_constants/MetricConstants.h" #include "unittest/Unittest.h" diff --git a/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp b/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp new file mode 100644 index 0000000000..1b22386701 --- /dev/null +++ b/core/unittest/monitor/SelfMonitorMetricEventUnittest.cpp @@ -0,0 +1,207 @@ +// Copyright 2024 iLogtail Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "monitor/MetricManager.h" +#include "monitor/metric_models/SelfMonitorMetricEvent.h" +#include "unittest/Unittest.h" + +namespace logtail { + +class SelfMonitorMetricEventUnittest : public ::testing::Test { +public: + void SetUp() {} + void TearDown() {} + + void TestCreateFromMetricEvent(); + void TestCreateFromGoMetricMap(); + void TestMerge(); + void TestSendInterval(); + +private: + std::shared_ptr mSourceBuffer; + std::unique_ptr mEventGroup; + std::unique_ptr mMetricEvent; +}; + +APSARA_UNIT_TEST_CASE(SelfMonitorMetricEventUnittest, TestCreateFromMetricEvent, 0); +APSARA_UNIT_TEST_CASE(SelfMonitorMetricEventUnittest, TestCreateFromGoMetricMap, 1); +APSARA_UNIT_TEST_CASE(SelfMonitorMetricEventUnittest, TestMerge, 2); +APSARA_UNIT_TEST_CASE(SelfMonitorMetricEventUnittest, TestSendInterval, 3); + +void SelfMonitorMetricEventUnittest::TestCreateFromMetricEvent() { + std::vector> labels; + labels.emplace_back(std::make_pair("plugin_type", "input_file")); + labels.emplace_back(std::make_pair("plugin_id", "1")); + labels.emplace_back(std::make_pair("pipeline_name", "pipeline_test")); + labels.emplace_back(std::make_pair("project", "project_a")); + + MetricsRecord* pluginMetric = new MetricsRecord(MetricCategory::METRIC_CATEGORY_PLUGIN, + std::make_shared(labels), + std::make_shared()); + + CounterPtr outSizeBytes = pluginMetric->CreateCounter("out_size_bytes"); + outSizeBytes->Add(100); + CounterPtr outEventTotal = pluginMetric->CreateCounter("out_event_total"); + outEventTotal->Add(1024); + IntGaugePtr monitorFileTotal = pluginMetric->CreateIntGauge("monitor_file_total"); + monitorFileTotal->Set(10); + + SelfMonitorMetricEvent event(pluginMetric); + + APSARA_TEST_EQUAL(MetricCategory::METRIC_CATEGORY_PLUGIN, event.mCategory); + APSARA_TEST_EQUAL(4U, event.mLabels.size()); + APSARA_TEST_EQUAL("input_file", event.mLabels["plugin_type"]); + APSARA_TEST_EQUAL("1", event.mLabels["plugin_id"]); + APSARA_TEST_EQUAL("pipeline_test", event.mLabels["pipeline_name"]); + APSARA_TEST_EQUAL("project_a", event.mLabels["project"]); + APSARA_TEST_EQUAL(2U, event.mCounters.size()); + APSARA_TEST_EQUAL(100U, event.mCounters["out_size_bytes"]); + APSARA_TEST_EQUAL(1024U, event.mCounters["out_event_total"]); + APSARA_TEST_EQUAL(1U, event.mGauges.size()); + APSARA_TEST_EQUAL(10, event.mGauges["monitor_file_total"]); + + delete pluginMetric; +} + +void SelfMonitorMetricEventUnittest::TestCreateFromGoMetricMap() { + std::map pluginMetric; + pluginMetric["labels"] = R"( + { + "metric_category":"plugin", + "plugin_type":"input_file", + "plugin_id":"1", + "pipeline_name":"pipeline_test", + "project":"project_a" + } + )"; + pluginMetric["counters"] = R"( + { + "out_size_bytes": "100", + "out_event_total": "1024" + } + )"; + pluginMetric["gauges"] = R"( + { + "monitor_file_total": "10" + } + )"; + SelfMonitorMetricEvent event(pluginMetric); + + APSARA_TEST_EQUAL(MetricCategory::METRIC_CATEGORY_PLUGIN, event.mCategory); + APSARA_TEST_EQUAL(4U, event.mLabels.size()); + APSARA_TEST_EQUAL("input_file", event.mLabels["plugin_type"]); + APSARA_TEST_EQUAL("1", event.mLabels["plugin_id"]); + APSARA_TEST_EQUAL("pipeline_test", event.mLabels["pipeline_name"]); + APSARA_TEST_EQUAL("project_a", event.mLabels["project"]); + APSARA_TEST_EQUAL(2U, event.mCounters.size()); + APSARA_TEST_EQUAL(100U, event.mCounters["out_size_bytes"]); + APSARA_TEST_EQUAL(1024U, event.mCounters["out_event_total"]); + APSARA_TEST_EQUAL(1U, event.mGauges.size()); + APSARA_TEST_EQUAL(10, event.mGauges["monitor_file_total"]); +} + +void SelfMonitorMetricEventUnittest::TestMerge() { + { + SelfMonitorMetricEvent event1; + SelfMonitorMetricEvent event2; + + // 初始化 event1 和 event2 + event1.mCounters["counter1"] = 100; + event1.mGauges["gauge1"] = 1.5; + event2.mCounters["counter1"] = 200; + event2.mGauges["gauge1"] = 2.5; + + event1.mUpdatedFlag = false; + event2.mUpdatedFlag = true; + + event1.Merge(event2); + + // 检验是否正确合并 + APSARA_TEST_EQUAL(300, event1.mCounters["counter1"]); + APSARA_TEST_EQUAL(2.5, event1.mGauges["gauge1"]); + APSARA_TEST_TRUE(event1.mUpdatedFlag); + } + // 含有不重叠键值的情况 + { + SelfMonitorMetricEvent event1; + SelfMonitorMetricEvent event2; + + // 初始化 event1 和 event2 + event1.mCounters["counter1"] = 100; + event2.mCounters["counter2"] = 200; + event1.mGauges["gauge1"] = 1.5; + event2.mGauges["gauge2"] = 2.5; + + event1.Merge(event2); + + // 检验是否正确合并 + APSARA_TEST_EQUAL(100, event1.mCounters["counter1"]); + APSARA_TEST_EQUAL(200, event1.mCounters["counter2"]); + APSARA_TEST_EQUAL(1.5, event1.mGauges["gauge1"]); + APSARA_TEST_EQUAL(2.5, event1.mGauges["gauge2"]); + } + // 不同发送间隔 + { + SelfMonitorMetricEvent event1; + SelfMonitorMetricEvent event2; + + event1.SetInterval(5); + event2.SetInterval(10); + + event1.mCounters["counter1"] = 100; + event2.mCounters["counter1"] = 200; + + event1.Merge(event2); + + // 检验间隔是否被设置为 event2 的间隔 + APSARA_TEST_EQUAL(0, event1.mLastSendInterval); + APSARA_TEST_EQUAL(10, event1.mSendInterval); + // 检验计数器是否正确合并 + APSARA_TEST_EQUAL(300, event1.mCounters["counter1"]); + } +} + +void SelfMonitorMetricEventUnittest::TestSendInterval() { + SelfMonitorMetricEvent event; + mSourceBuffer.reset(new SourceBuffer); + mEventGroup.reset(new PipelineEventGroup(mSourceBuffer)); + mMetricEvent = mEventGroup->CreateMetricEvent(); + + event.mUpdatedFlag = true; + event.SetInterval(3); + APSARA_TEST_FALSE(event.ShouldSend()); + APSARA_TEST_FALSE(event.ShouldDelete()); + APSARA_TEST_FALSE(event.ShouldSend()); // 模拟两次调用,间隔计数为2 + APSARA_TEST_FALSE(event.ShouldDelete()); + APSARA_TEST_TRUE(event.ShouldSend()); // 第三次调用,间隔计数达到3,应返回true + APSARA_TEST_FALSE(event.ShouldDelete()); + event.ReadAsMetricEvent(mMetricEvent.get()); + APSARA_TEST_FALSE(event.ShouldDelete()); + + event.mUpdatedFlag = false; + APSARA_TEST_FALSE(event.ShouldSend()); + APSARA_TEST_FALSE(event.ShouldDelete()); + APSARA_TEST_FALSE(event.ShouldSend()); + APSARA_TEST_FALSE(event.ShouldDelete()); + APSARA_TEST_FALSE(event.ShouldSend()); + APSARA_TEST_TRUE(event.ShouldDelete()); // 第三次调用,间隔计数达到3,应返回true +} + +} // namespace logtail + +int main(int argc, char** argv) { + logtail::Logger::Instance().InitGlobalLoggers(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file