From 7af6044a6a5dc8e97cd1886e22f64629370739c6 Mon Sep 17 00:00:00 2001 From: liqiang Date: Mon, 6 Jan 2025 08:31:27 +0000 Subject: [PATCH 01/48] feat: prom agent basic info and targets info --- core/monitor/Monitor.cpp | 2 ++ core/monitor/Monitor.h | 5 ++++ core/prometheus/Constants.h | 9 +++++++ core/prometheus/PrometheusInputRunner.cpp | 19 ++++++++++++++ core/prometheus/PrometheusInputRunner.h | 12 +++++++++ .../prometheus/schedulers/ScrapeScheduler.cpp | 12 ++++----- core/prometheus/schedulers/ScrapeScheduler.h | 6 ++--- .../schedulers/TargetSubscriberScheduler.cpp | 25 +++++++++++++++++- .../schedulers/TargetSubscriberScheduler.h | 3 ++- .../TargetSubscriberSchedulerUnittest.cpp | 26 +++++++++++++++++-- 10 files changed, 105 insertions(+), 14 deletions(-) diff --git a/core/monitor/Monitor.cpp b/core/monitor/Monitor.cpp index 7e4f37bcd0..d55455e331 100644 --- a/core/monitor/Monitor.cpp +++ b/core/monitor/Monitor.cpp @@ -177,6 +177,8 @@ void LogtailMonitor::Monitor() { GetMemStat(); CalCpuStat(curCpuStat, mCpuStat); + mCpuUsage.store(mCpuStat.mCpuUsage); + mMemoryUsage.store(mMemStat.mRss); if (CheckHardMemLimit()) { LOG_ERROR(sLogger, ("Resource used by program exceeds hard limit", diff --git a/core/monitor/Monitor.h b/core/monitor/Monitor.h index 4b5e830099..d266a390a9 100644 --- a/core/monitor/Monitor.h +++ b/core/monitor/Monitor.h @@ -93,6 +93,8 @@ class LogtailMonitor { // GetRealtimeCpuLevel return a value to indicates current CPU usage level. // LogInput use it to do flow control. float GetRealtimeCpuLevel() { return mRealtimeCpuStat.mCpuUsage / mScaledCpuUsageUpLimit; } + [[nodiscard]] float GetCpuUsage() const { return mCpuUsage.load(); } + [[nodiscard]] float GetMemoryUsage() const { return mMemoryUsage.load(); } private: LogtailMonitor(); @@ -162,6 +164,9 @@ class LogtailMonitor { // Memory usage statistics. MemStat mMemStat; + std::atomic mCpuUsage = 0; + std::atomic mMemoryUsage = 0; + // Current scale up level, updated by CheckScaledCpuUsageUpLimit. float mScaledCpuUsageUpLimit; #if defined(__linux__) diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index f0be5177ac..58ef9d7174 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -117,4 +117,13 @@ const char* const ACCEPT_ENCODING = "Accept-Encoding"; const char* const GZIP = "gzip"; const char* const IDENTITY = "identity"; +const char* const AGENT_INFO = "AgentInfo"; +const char* const TARGETS_INFO = "TargetsInfo"; +const char* const CPU_LIMIT = "CpuLimit"; +const char* const CPU_USAGE = "CpuUsage"; +const char* const MEM_LIMIT = "MemLimit"; +const char* const MEM_USAGE = "MemUsage"; +const char* const HASH = "Hash"; +const char* const SERIES = "Series"; + } // namespace logtail::prometheus diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index f3686ab3c7..f28482ecad 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -31,6 +31,7 @@ #include "common/http/Curl.h" #include "common/timer/Timer.h" #include "logger/Logger.h" +#include "monitor/Monitor.h" #include "monitor/metric_constants/MetricConstants.h" #include "plugin/flusher/sls/FlusherSLS.h" #include "prometheus/Constants.h" @@ -51,6 +52,7 @@ PrometheusInputRunner::PrometheusInputRunner() mEventPool(true), mUnRegisterMs(0) { mTimer = std::make_shared(); + mLastUpdateTime = std::chrono::steady_clock::now(); // self monitor MetricLabels labels; @@ -293,4 +295,21 @@ string PrometheusInputRunner::GetAllProjects() { void PrometheusInputRunner::CheckGC() { mEventPool.CheckGC(); } + +PromAgentInfo PrometheusInputRunner::GetAgentInfo() { + std::lock_guard lock(mAgentInfoMutex); + auto curTime = std::chrono::steady_clock::now(); +#ifdef APSARA_UNIT_TEST_MAIN + curTime += std::chrono::seconds(prometheus::RefeshIntervalSeconds); +#endif + if (curTime - mLastUpdateTime >= std::chrono::seconds(prometheus::RefeshIntervalSeconds)) { + mLastUpdateTime = curTime; + mAgentInfo.mCpuUsage = LogtailMonitor::GetInstance()->GetCpuUsage(); + mAgentInfo.mMemUsage = LogtailMonitor::GetInstance()->GetMemoryUsage(); + mAgentInfo.mCpuLimit = AppConfig::GetInstance()->GetCpuUsageUpLimit(); + mAgentInfo.mMemLimit = AppConfig::GetInstance()->GetMemUsageUpLimit(); + } + + return mAgentInfo; +} }; // namespace logtail \ No newline at end of file diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index 8983bdd4d8..3ce5e63285 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -30,6 +30,13 @@ namespace logtail { +struct PromAgentInfo { + float mCpuUsage; + float mMemUsage; + float mCpuLimit; + float mMemLimit; +}; + class PrometheusInputRunner : public InputRunner { public: PrometheusInputRunner(const PrometheusInputRunner&) = delete; @@ -42,6 +49,7 @@ class PrometheusInputRunner : public InputRunner { return &sInstance; } void CheckGC(); + PromAgentInfo GetAgentInfo(); // input plugin update void UpdateScrapeInput(std::shared_ptr targetSubscriber, @@ -74,6 +82,10 @@ class PrometheusInputRunner : public InputRunner { int32_t mServicePort; std::string mPodName; + std::mutex mAgentInfoMutex; + PromAgentInfo mAgentInfo{0, 0, 0, 0}; + std::chrono::steady_clock::time_point mLastUpdateTime; + std::shared_ptr mTimer; EventPool mEventPool; diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index e345d7b1b5..3226f4244e 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -49,7 +49,8 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, mScrapeConfigPtr(std::move(scrapeConfigPtr)), mHost(std::move(host)), mPort(port), - mQueueKey(queueKey) { + mQueueKey(queueKey), + mScrapeSamplesScraped(0) { string tmpTargetURL = mScrapeConfigPtr->mScheme + "://" + mHost + ":" + ToString(mPort) + mScrapeConfigPtr->mMetricsPath + (mScrapeConfigPtr->mQueryString.empty() ? "" : "?" + mScrapeConfigPtr->mQueryString); @@ -83,20 +84,19 @@ void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t) { scrapeState = prom::NetworkCodeToState(NetworkCode::Ok); } - mScrapeDurationSeconds = scrapeDurationMilliSeconds * sRate; - mUpState = response.GetStatusCode() == 200; if (response.GetStatusCode() != 200) { LOG_WARNING(sLogger, ("scrape failed, status code", response.GetStatusCode())("target", mHash)("curl msg", response.GetNetworkStatus().mMessage)); } - auto mScrapeDurationSeconds = scrapeDurationMilliSeconds * sRate; - auto mUpState = response.GetStatusCode() == 200; + auto scrapeDurationSeconds = scrapeDurationMilliSeconds * sRate; + auto upState = response.GetStatusCode() == 200; mPromStreamScraper.mStreamIndex++; mPromStreamScraper.FlushCache(); - mPromStreamScraper.SetAutoMetricMeta(mScrapeDurationSeconds, mUpState, scrapeState); + mPromStreamScraper.SetAutoMetricMeta(scrapeDurationSeconds, upState, scrapeState); mPromStreamScraper.SendMetrics(); + mScrapeSamplesScraped = mPromStreamScraper.mScrapeSamplesScraped; mPromStreamScraper.Reset(); mPluginTotalDelayMs->Add(scrapeDurationMilliSeconds); diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index 551ada0da9..fc2309de6c 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -49,6 +49,7 @@ class ScrapeScheduler : public BaseScheduler { std::string GetId() const; void SetComponent(std::shared_ptr timer, EventPool* eventPool); + uint64_t GetLastScrapeTimeSeries() const { return mScrapeSamplesScraped; } void ScheduleNext() override; void ScrapeOnce(std::chrono::steady_clock::time_point execTime); @@ -70,10 +71,7 @@ class ScrapeScheduler : public BaseScheduler { QueueKey mQueueKey; // auto metrics - uint64_t mScrapeTimestampMilliSec = 0; - double mScrapeDurationSeconds = 0; - uint64_t mScrapeResponseSizeBytes = 0; - bool mUpState = true; + std::atomic_uint64_t mScrapeSamplesScraped; // self monitor std::shared_ptr mSelfMonitor; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index f3f7647adc..be2f26fa7e 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -30,6 +30,7 @@ #include "logger/Logger.h" #include "monitor/metric_constants/MetricConstants.h" #include "prometheus/Constants.h" +#include "prometheus/PrometheusInputRunner.h" #include "prometheus/Utils.h" #include "prometheus/async/PromFuture.h" #include "prometheus/async/PromHttpRequest.h" @@ -296,6 +297,8 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: if (!mETag.empty()) { httpHeader[prometheus::IF_NONE_MATCH] = mETag; } + auto body = TargetsInfoToString(); + LOG_INFO(sLogger, ("body", body)); auto request = std::make_unique(HTTP_GET, false, mServiceHost, @@ -303,7 +306,7 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: "/jobs/" + URLEncode(GetId()) + "/targets", "collector_id=" + mPodName, httpHeader, - "", + body, HttpResponse(), prometheus::RefeshIntervalSeconds, 1, @@ -313,6 +316,26 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: return timerEvent; } +string TargetSubscriberScheduler::TargetsInfoToString() const { + Json::Value root; + root[prometheus::JOB_NAME] = mJobName; + auto agentInfo = PrometheusInputRunner::GetInstance()->GetAgentInfo(); + root[prometheus::AGENT_INFO][prometheus::CPU_USAGE] = agentInfo.mCpuUsage; + root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = agentInfo.mCpuLimit; + root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; + root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; + { + ReadLock lock(mRWLock); + for (const auto& [k, v] : mScrapeSchedulerMap) { + Json::Value targetInfo; + targetInfo[prometheus::HASH] = v->GetId(); + targetInfo[prometheus::SERIES] = v->GetLastScrapeTimeSeries(); + root[prometheus::TARGETS_INFO].append(targetInfo); + } + } + return root.toStyledString(); +} + void TargetSubscriberScheduler::CancelAllScrapeScheduler() { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.h b/core/prometheus/schedulers/TargetSubscriberScheduler.h index 1ea5db03de..7e89f07508 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.h +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.h @@ -69,13 +69,14 @@ class TargetSubscriberScheduler : public BaseScheduler { BuildScrapeSchedulerSet(std::vector& scrapeSchedulerGroup); std::unique_ptr BuildSubscriberTimerEvent(std::chrono::steady_clock::time_point execTime); + std::string TargetsInfoToString() const; void UpdateScrapeScheduler(std::unordered_map>&); void CancelAllScrapeScheduler(); std::shared_ptr mScrapeConfigPtr; - ReadWriteLock mRWLock; + mutable ReadWriteLock mRWLock; std::unordered_map> mScrapeSchedulerMap; std::string mJobName; diff --git a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp index 58544f1c22..27374598fe 100644 --- a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp +++ b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp @@ -14,13 +14,14 @@ * limitations under the License. */ +#include + #include #include #include -#include "json/json.h" - #include "common/JsonUtil.h" +#include "prometheus/Constants.h" #include "prometheus/labels/Labels.h" #include "prometheus/schedulers/TargetSubscriberScheduler.h" #include "unittest/Unittest.h" @@ -35,6 +36,7 @@ class TargetSubscriberSchedulerUnittest : public ::testing::Test { void TestProcess(); void TestParseTargetGroups(); void TestBuildScrapeSchedulerSet(); + void TestTargetsInfoToString(); protected: void SetUp() override { @@ -215,10 +217,30 @@ void TargetSubscriberSchedulerUnittest::TestBuildScrapeSchedulerSet() { APSARA_TEST_NOT_EQUAL(startTimeList[0].second, startTimeList[2].second); } +void TargetSubscriberSchedulerUnittest::TestTargetsInfoToString() { + std::shared_ptr targetSubscriber = std::make_shared(); + auto metricLabels = MetricLabels(); + APSARA_TEST_TRUE(targetSubscriber->Init(mConfig["ScrapeConfig"])); + targetSubscriber->InitSelfMonitor(metricLabels); + + // if status code is 200 + mHttpResponse.SetStatusCode(200); + targetSubscriber->OnSubscription(mHttpResponse, 0); + APSARA_TEST_EQUAL(3UL, targetSubscriber->mScrapeSchedulerMap.size()); + + auto res = targetSubscriber->TargetsInfoToString(); + string errorMsg; + Json::Value data; + ParseJsonTable(res, data, errorMsg); + APSARA_TEST_EQUAL(2.0, data[prometheus::AGENT_INFO][prometheus::CPU_LIMIT].asFloat()); + APSARA_TEST_EQUAL((uint64_t)3, data[prometheus::TARGETS_INFO].size()); +} + UNIT_TEST_CASE(TargetSubscriberSchedulerUnittest, OnInitScrapeJobEvent) UNIT_TEST_CASE(TargetSubscriberSchedulerUnittest, TestProcess) UNIT_TEST_CASE(TargetSubscriberSchedulerUnittest, TestParseTargetGroups) UNIT_TEST_CASE(TargetSubscriberSchedulerUnittest, TestBuildScrapeSchedulerSet) +UNIT_TEST_CASE(TargetSubscriberSchedulerUnittest, TestTargetsInfoToString) } // namespace logtail From 39c7284d7060ab1326063b87057e7b95c8995c40 Mon Sep 17 00:00:00 2001 From: liqiang Date: Mon, 6 Jan 2025 09:01:47 +0000 Subject: [PATCH 02/48] feat: prom agent info basic health value --- core/prometheus/Constants.h | 1 + core/prometheus/PrometheusInputRunner.cpp | 28 +++++++++++++++++++ core/prometheus/PrometheusInputRunner.h | 1 + .../schedulers/TargetSubscriberScheduler.cpp | 1 + .../TargetSubscriberSchedulerUnittest.cpp | 1 + 5 files changed, 32 insertions(+) diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index 58ef9d7174..b97d098992 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -125,5 +125,6 @@ const char* const MEM_LIMIT = "MemLimit"; const char* const MEM_USAGE = "MemUsage"; const char* const HASH = "Hash"; const char* const SERIES = "Series"; +const char* const HEALTH = "Health"; } // namespace logtail::prometheus diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index f28482ecad..e84ee0e29a 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -33,6 +33,7 @@ #include "logger/Logger.h" #include "monitor/Monitor.h" #include "monitor/metric_constants/MetricConstants.h" +#include "pipeline/queue/ProcessQueueManager.h" #include "plugin/flusher/sls/FlusherSLS.h" #include "prometheus/Constants.h" #include "prometheus/Utils.h" @@ -308,6 +309,33 @@ PromAgentInfo PrometheusInputRunner::GetAgentInfo() { mAgentInfo.mMemUsage = LogtailMonitor::GetInstance()->GetMemoryUsage(); mAgentInfo.mCpuLimit = AppConfig::GetInstance()->GetCpuUsageUpLimit(); mAgentInfo.mMemLimit = AppConfig::GetInstance()->GetMemUsageUpLimit(); + + int queueNums = 0; + int validToPushNums = 0; + + { + ReadLock lock(mSubscriberMapRWLock); + queueNums = mTargetSubscriberSchedulerMap.size(); + for (auto& [k, v] : mTargetSubscriberSchedulerMap) { + if (ProcessQueueManager::GetInstance()->IsValidToPush(v->mQueueKey)) { + validToPushNums++; + } + } + } + mAgentInfo.mHealth = 0; + if (mAgentInfo.mCpuLimit > 0.0) { + mAgentInfo.mHealth += (1 - mAgentInfo.mCpuUsage / mAgentInfo.mCpuLimit); + } + if (mAgentInfo.mMemLimit > 0.0) { + mAgentInfo.mHealth += (1 - mAgentInfo.mMemUsage / mAgentInfo.mMemLimit); + } + if (queueNums > 0) { + mAgentInfo.mHealth += (1.0 * validToPushNums / queueNums); + } else { + mAgentInfo.mHealth += 1; + } + + mAgentInfo.mHealth /= 3; } return mAgentInfo; diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index 3ce5e63285..3789e45e74 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -35,6 +35,7 @@ struct PromAgentInfo { float mMemUsage; float mCpuLimit; float mMemLimit; + float mHealth; }; class PrometheusInputRunner : public InputRunner { diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index be2f26fa7e..7f46ccad0c 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -324,6 +324,7 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = agentInfo.mCpuLimit; root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; + root[prometheus::AGENT_INFO][prometheus::HEALTH] = agentInfo.mHealth; { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { diff --git a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp index 27374598fe..f108d2d92c 100644 --- a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp +++ b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp @@ -234,6 +234,7 @@ void TargetSubscriberSchedulerUnittest::TestTargetsInfoToString() { ParseJsonTable(res, data, errorMsg); APSARA_TEST_EQUAL(2.0, data[prometheus::AGENT_INFO][prometheus::CPU_LIMIT].asFloat()); APSARA_TEST_EQUAL((uint64_t)3, data[prometheus::TARGETS_INFO].size()); + APSARA_TEST_EQUAL(true, data[prometheus::AGENT_INFO][prometheus::HEALTH].asFloat() > 0.6); } UNIT_TEST_CASE(TargetSubscriberSchedulerUnittest, OnInitScrapeJobEvent) From 27a1849d9fc884862821bef655f56532bc1aded8 Mon Sep 17 00:00:00 2001 From: liqiang Date: Mon, 6 Jan 2025 09:11:49 +0000 Subject: [PATCH 03/48] chore: remove unnecessory logs --- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 7f46ccad0c..d3cb4a944f 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -298,7 +298,6 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: httpHeader[prometheus::IF_NONE_MATCH] = mETag; } auto body = TargetsInfoToString(); - LOG_INFO(sLogger, ("body", body)); auto request = std::make_unique(HTTP_GET, false, mServiceHost, From bb81d0e087df409c8b92b4cfe9135cbea55ab6a5 Mon Sep 17 00:00:00 2001 From: liqiang Date: Wed, 8 Jan 2025 02:23:50 +0000 Subject: [PATCH 04/48] feat: use response size instead of series --- core/prometheus/Constants.h | 2 +- core/prometheus/schedulers/ScrapeScheduler.cpp | 4 ++-- core/prometheus/schedulers/ScrapeScheduler.h | 4 ++-- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index b97d098992..39211208f2 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -124,7 +124,7 @@ const char* const CPU_USAGE = "CpuUsage"; const char* const MEM_LIMIT = "MemLimit"; const char* const MEM_USAGE = "MemUsage"; const char* const HASH = "Hash"; -const char* const SERIES = "Series"; +const char* const SIZE = "Size"; const char* const HEALTH = "Health"; } // namespace logtail::prometheus diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index 3226f4244e..8f53c1f1dd 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -50,7 +50,7 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, mHost(std::move(host)), mPort(port), mQueueKey(queueKey), - mScrapeSamplesScraped(0) { + mScrapeResponseSizeBytes(0) { string tmpTargetURL = mScrapeConfigPtr->mScheme + "://" + mHost + ":" + ToString(mPort) + mScrapeConfigPtr->mMetricsPath + (mScrapeConfigPtr->mQueryString.empty() ? "" : "?" + mScrapeConfigPtr->mQueryString); @@ -96,7 +96,7 @@ void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t) { mPromStreamScraper.FlushCache(); mPromStreamScraper.SetAutoMetricMeta(scrapeDurationSeconds, upState, scrapeState); mPromStreamScraper.SendMetrics(); - mScrapeSamplesScraped = mPromStreamScraper.mScrapeSamplesScraped; + mScrapeResponseSizeBytes = mPromStreamScraper.mRawSize; mPromStreamScraper.Reset(); mPluginTotalDelayMs->Add(scrapeDurationMilliSeconds); diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index fc2309de6c..e5ee5e3b6d 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -49,7 +49,7 @@ class ScrapeScheduler : public BaseScheduler { std::string GetId() const; void SetComponent(std::shared_ptr timer, EventPool* eventPool); - uint64_t GetLastScrapeTimeSeries() const { return mScrapeSamplesScraped; } + uint64_t GetLastScrapeSize() const { return mScrapeResponseSizeBytes; } void ScheduleNext() override; void ScrapeOnce(std::chrono::steady_clock::time_point execTime); @@ -71,7 +71,7 @@ class ScrapeScheduler : public BaseScheduler { QueueKey mQueueKey; // auto metrics - std::atomic_uint64_t mScrapeSamplesScraped; + std::atomic_uint64_t mScrapeResponseSizeBytes; // self monitor std::shared_ptr mSelfMonitor; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index d3cb4a944f..830c23524f 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -329,7 +329,7 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { for (const auto& [k, v] : mScrapeSchedulerMap) { Json::Value targetInfo; targetInfo[prometheus::HASH] = v->GetId(); - targetInfo[prometheus::SERIES] = v->GetLastScrapeTimeSeries(); + targetInfo[prometheus::SIZE] = v->GetLastScrapeSize(); root[prometheus::TARGETS_INFO].append(targetInfo); } } From 78c78a8cd92433b34e79a8fcc69e8951ec3f1410 Mon Sep 17 00:00:00 2001 From: liqiang Date: Wed, 8 Jan 2025 02:50:42 +0000 Subject: [PATCH 05/48] chore: remove job name in targets info --- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 830c23524f..e211627e3a 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -317,7 +317,6 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: string TargetSubscriberScheduler::TargetsInfoToString() const { Json::Value root; - root[prometheus::JOB_NAME] = mJobName; auto agentInfo = PrometheusInputRunner::GetInstance()->GetAgentInfo(); root[prometheus::AGENT_INFO][prometheus::CPU_USAGE] = agentInfo.mCpuUsage; root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = agentInfo.mCpuLimit; From 4399c2b4972a2768e0a50ec234d9800196af5cfc Mon Sep 17 00:00:00 2001 From: liqiang Date: Wed, 8 Jan 2025 13:35:47 +0000 Subject: [PATCH 06/48] feat: update labels hash calc --- core/prometheus/labels/Labels.cpp | 24 +++++++++++++++---- .../prometheus/schedulers/ScrapeScheduler.cpp | 7 +++--- .../prometheus/ScrapeSchedulerUnittest.cpp | 9 ++++--- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/core/prometheus/labels/Labels.cpp b/core/prometheus/labels/Labels.cpp index f701cabd82..60eb388ff7 100644 --- a/core/prometheus/labels/Labels.cpp +++ b/core/prometheus/labels/Labels.cpp @@ -111,10 +111,26 @@ void Labels::Range(const std::function& uint64_t Labels::Hash() { string hash; uint64_t sum = prometheus::OFFSET64; - Range([&hash](const string& k, const string& v) { hash += k + "\xff" + v + "\xff"; }); - for (auto i : hash) { - sum ^= (uint64_t)i; - sum *= prometheus::PRIME64; + vector names; + Range([&names](const string& k, const string&) { names.push_back(k); }); + sort(names.begin(), names.end()); + auto calc = [](uint64_t h, uint64_t c) { + h ^= (uint64_t)c; + h *= prometheus::PRIME64; + return h; + }; + auto calcString = [](uint64_t h, const string& s) { + for (auto c : s) { + h ^= (uint64_t)c; + h *= prometheus::PRIME64; + } + return h; + }; + for (const auto& name : names) { + sum = calcString(sum, name); + sum = calc(sum, 255); + sum = calcString(sum, Get(name)); + sum = calc(sum, 255); } return sum; } diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index 8f53c1f1dd..545a18ce6c 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -51,11 +51,10 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, mPort(port), mQueueKey(queueKey), mScrapeResponseSizeBytes(0) { - string tmpTargetURL = mScrapeConfigPtr->mScheme + "://" + mHost + ":" + ToString(mPort) - + mScrapeConfigPtr->mMetricsPath - + (mScrapeConfigPtr->mQueryString.empty() ? "" : "?" + mScrapeConfigPtr->mQueryString); - mHash = mScrapeConfigPtr->mJobName + tmpTargetURL + ToString(labels.Hash()); mInstance = mHost + ":" + ToString(mPort); + std::ostringstream hash; + hash << std::setw(16) << std::setfill('0') << std::hex << labels.Hash(); + mHash = mScrapeConfigPtr->mJobName + mInstance + hash.str(); mInterval = mScrapeConfigPtr->mScrapeIntervalSeconds; mPromStreamScraper.mHash = mHash; diff --git a/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp b/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp index 1eb84623a3..f40252b911 100644 --- a/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp +++ b/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp @@ -61,8 +61,11 @@ class ScrapeSchedulerUnittest : public testing::Test { void ScrapeSchedulerUnittest::TestInitscrapeScheduler() { Labels labels; labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); + labels.Set("testb", "valueb"); + labels.Set("testa", "localhost:8080"); ScrapeScheduler event(mScrapeConfig, "localhost", 8080, labels, 0, 0); - APSARA_TEST_EQUAL(event.GetId(), "test_jobhttp://localhost:8080/metrics" + ToString(labels.Hash())); + + APSARA_TEST_EQUAL(event.GetId(), "test_joblocalhost:8080887d0db7cce49fc7"); } void ScrapeSchedulerUnittest::TestProcess() { @@ -76,7 +79,7 @@ void ScrapeSchedulerUnittest::TestProcess() { = HttpResponse(&event.mPromStreamScraper, [](void*) {}, prom::StreamScraper::MetricWriteCallback); auto defaultLabels = MetricLabels(); event.InitSelfMonitor(defaultLabels); - APSARA_TEST_EQUAL(event.GetId(), "test_jobhttp://localhost:8080/metrics" + ToString(labels.Hash())); + // APSARA_TEST_EQUAL(event.GetId(), "test_jobhttp://localhost:8080/metrics" + ToString(labels.Hash())); // if status code is not 200, no data will be processed // but will continue running, sending self-monitoring metrics httpResponse.SetStatusCode(503); @@ -134,7 +137,7 @@ void ScrapeSchedulerUnittest::TestStreamMetricWriteCallback() { ScrapeScheduler event(mScrapeConfig, "localhost", 8080, labels, 0, 0); HttpResponse httpResponse = HttpResponse(&event.mPromStreamScraper, [](void*) {}, prom::StreamScraper::MetricWriteCallback); - APSARA_TEST_EQUAL(event.GetId(), "test_jobhttp://localhost:8080/metrics" + ToString(labels.Hash())); + // APSARA_TEST_EQUAL(event.GetId(), "test_jobhttp://localhost:8080/metrics" + ToString(labels.Hash())); string body1 = "# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles.\n" "# TYPE go_gc_duration_seconds summary\n" From f479d2d8993a91c9989b339a28c90a32e8a12276 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 9 Jan 2025 02:21:12 +0000 Subject: [PATCH 07/48] feat: update instance calc --- core/prometheus/component/StreamScraper.h | 11 +++++++++-- core/prometheus/schedulers/ScrapeScheduler.cpp | 2 +- .../schedulers/TargetSubscriberScheduler.cpp | 3 --- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/core/prometheus/component/StreamScraper.h b/core/prometheus/component/StreamScraper.h index 3f3b9ab45d..b40202ee3e 100644 --- a/core/prometheus/component/StreamScraper.h +++ b/core/prometheus/component/StreamScraper.h @@ -4,8 +4,10 @@ #include #include "Labels.h" +#include "StringTools.h" #include "models/PipelineEventGroup.h" #include "pipeline/queue/QueueKey.h" +#include "prometheus/Constants.h" #ifdef APSARA_UNIT_TEST_MAIN #include @@ -16,11 +18,16 @@ namespace logtail::prom { class StreamScraper { public: - StreamScraper(Labels labels, QueueKey queueKey, size_t inputIndex) + StreamScraper(Labels labels, QueueKey queueKey, size_t inputIndex, const std::string& host, int32_t port) : mEventGroup(PipelineEventGroup(std::make_shared())), mQueueKey(queueKey), mInputIndex(inputIndex), - mTargetLabels(std::move(labels)) {} + mTargetLabels(std::move(labels)) { + auto instance = host + ":" + ToString(port); + if (mTargetLabels.Get(prometheus::INSTANCE).empty()) { + mTargetLabels.Set(prometheus::INSTANCE, instance); + } + } static size_t MetricWriteCallback(char* buffer, size_t size, size_t nmemb, void* data); void FlushCache(); diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index 545a18ce6c..ef4d344938 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -45,7 +45,7 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, Labels labels, QueueKey queueKey, size_t inputIndex) - : mPromStreamScraper(labels, queueKey, inputIndex), + : mPromStreamScraper(labels, queueKey, inputIndex, host, port), mScrapeConfigPtr(std::move(scrapeConfigPtr)), mHost(std::move(host)), mPort(port), diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index e211627e3a..5ced761b03 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -206,9 +206,6 @@ TargetSubscriberScheduler::BuildScrapeSchedulerSet(std::vector& targetGr } string address = resultLabel.Get(prometheus::ADDRESS_LABEL_NAME); - if (resultLabel.Get(prometheus::INSTANCE).empty()) { - resultLabel.Set(prometheus::INSTANCE, address); - } auto m = address.find(':'); if (m == string::npos) { From f9ac8bcbaed2eb071c694b370f6bfdf9df11b6f0 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 9 Jan 2025 10:31:37 +0800 Subject: [PATCH 08/48] Revert "feat: update instance calc" This reverts commit f479d2d8993a91c9989b339a28c90a32e8a12276. --- core/prometheus/component/StreamScraper.h | 11 ++--------- core/prometheus/schedulers/ScrapeScheduler.cpp | 2 +- .../schedulers/TargetSubscriberScheduler.cpp | 3 +++ 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/core/prometheus/component/StreamScraper.h b/core/prometheus/component/StreamScraper.h index b40202ee3e..3f3b9ab45d 100644 --- a/core/prometheus/component/StreamScraper.h +++ b/core/prometheus/component/StreamScraper.h @@ -4,10 +4,8 @@ #include #include "Labels.h" -#include "StringTools.h" #include "models/PipelineEventGroup.h" #include "pipeline/queue/QueueKey.h" -#include "prometheus/Constants.h" #ifdef APSARA_UNIT_TEST_MAIN #include @@ -18,16 +16,11 @@ namespace logtail::prom { class StreamScraper { public: - StreamScraper(Labels labels, QueueKey queueKey, size_t inputIndex, const std::string& host, int32_t port) + StreamScraper(Labels labels, QueueKey queueKey, size_t inputIndex) : mEventGroup(PipelineEventGroup(std::make_shared())), mQueueKey(queueKey), mInputIndex(inputIndex), - mTargetLabels(std::move(labels)) { - auto instance = host + ":" + ToString(port); - if (mTargetLabels.Get(prometheus::INSTANCE).empty()) { - mTargetLabels.Set(prometheus::INSTANCE, instance); - } - } + mTargetLabels(std::move(labels)) {} static size_t MetricWriteCallback(char* buffer, size_t size, size_t nmemb, void* data); void FlushCache(); diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index ef4d344938..545a18ce6c 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -45,7 +45,7 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, Labels labels, QueueKey queueKey, size_t inputIndex) - : mPromStreamScraper(labels, queueKey, inputIndex, host, port), + : mPromStreamScraper(labels, queueKey, inputIndex), mScrapeConfigPtr(std::move(scrapeConfigPtr)), mHost(std::move(host)), mPort(port), diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 5ced761b03..e211627e3a 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -206,6 +206,9 @@ TargetSubscriberScheduler::BuildScrapeSchedulerSet(std::vector& targetGr } string address = resultLabel.Get(prometheus::ADDRESS_LABEL_NAME); + if (resultLabel.Get(prometheus::INSTANCE).empty()) { + resultLabel.Set(prometheus::INSTANCE, address); + } auto m = address.find(':'); if (m == string::npos) { From 32f3a57982b5cef379ebc5a3142c8fb92f79e4cb Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 9 Jan 2025 02:34:52 +0000 Subject: [PATCH 09/48] chore: update instance --- core/prometheus/schedulers/ScrapeScheduler.cpp | 3 +++ core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index d30ab84689..005a99a78a 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -57,6 +57,9 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, hash << std::setw(16) << std::setfill('0') << std::hex << labels.Hash(); mHash = mScrapeConfigPtr->mJobName + mInstance + hash.str(); mInterval = mScrapeConfigPtr->mScrapeIntervalSeconds; + if (mTargetLabels.Get(prometheus::INSTANCE).empty()) { + mTargetLabels.Set(prometheus::INSTANCE, mInstance); + } } void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t) { diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index e211627e3a..5ced761b03 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -206,9 +206,6 @@ TargetSubscriberScheduler::BuildScrapeSchedulerSet(std::vector& targetGr } string address = resultLabel.Get(prometheus::ADDRESS_LABEL_NAME); - if (resultLabel.Get(prometheus::INSTANCE).empty()) { - resultLabel.Set(prometheus::INSTANCE, address); - } auto m = address.find(':'); if (m == string::npos) { From d811148bb10bce8dfab251237facb1b9a3fca139 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 9 Jan 2025 02:50:24 +0000 Subject: [PATCH 10/48] chore: update --- .../prometheus/schedulers/ScrapeScheduler.cpp | 3 --- .../schedulers/TargetSubscriberScheduler.cpp | 27 +++---------------- 2 files changed, 4 insertions(+), 26 deletions(-) diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index 005a99a78a..d30ab84689 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -57,9 +57,6 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, hash << std::setw(16) << std::setfill('0') << std::hex << labels.Hash(); mHash = mScrapeConfigPtr->mJobName + mInstance + hash.str(); mInterval = mScrapeConfigPtr->mScrapeIntervalSeconds; - if (mTargetLabels.Get(prometheus::INSTANCE).empty()) { - mTargetLabels.Set(prometheus::INSTANCE, mInstance); - } } void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t) { diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 5ced761b03..f3f7647adc 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -30,7 +30,6 @@ #include "logger/Logger.h" #include "monitor/metric_constants/MetricConstants.h" #include "prometheus/Constants.h" -#include "prometheus/PrometheusInputRunner.h" #include "prometheus/Utils.h" #include "prometheus/async/PromFuture.h" #include "prometheus/async/PromHttpRequest.h" @@ -206,6 +205,9 @@ TargetSubscriberScheduler::BuildScrapeSchedulerSet(std::vector& targetGr } string address = resultLabel.Get(prometheus::ADDRESS_LABEL_NAME); + if (resultLabel.Get(prometheus::INSTANCE).empty()) { + resultLabel.Set(prometheus::INSTANCE, address); + } auto m = address.find(':'); if (m == string::npos) { @@ -294,7 +296,6 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: if (!mETag.empty()) { httpHeader[prometheus::IF_NONE_MATCH] = mETag; } - auto body = TargetsInfoToString(); auto request = std::make_unique(HTTP_GET, false, mServiceHost, @@ -302,7 +303,7 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: "/jobs/" + URLEncode(GetId()) + "/targets", "collector_id=" + mPodName, httpHeader, - body, + "", HttpResponse(), prometheus::RefeshIntervalSeconds, 1, @@ -312,26 +313,6 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: return timerEvent; } -string TargetSubscriberScheduler::TargetsInfoToString() const { - Json::Value root; - auto agentInfo = PrometheusInputRunner::GetInstance()->GetAgentInfo(); - root[prometheus::AGENT_INFO][prometheus::CPU_USAGE] = agentInfo.mCpuUsage; - root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = agentInfo.mCpuLimit; - root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; - root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; - root[prometheus::AGENT_INFO][prometheus::HEALTH] = agentInfo.mHealth; - { - ReadLock lock(mRWLock); - for (const auto& [k, v] : mScrapeSchedulerMap) { - Json::Value targetInfo; - targetInfo[prometheus::HASH] = v->GetId(); - targetInfo[prometheus::SIZE] = v->GetLastScrapeSize(); - root[prometheus::TARGETS_INFO].append(targetInfo); - } - } - return root.toStyledString(); -} - void TargetSubscriberScheduler::CancelAllScrapeScheduler() { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { From c74b96c28464541f865f8ac9ff1f4e3df13aaa22 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 9 Jan 2025 03:50:07 +0000 Subject: [PATCH 11/48] feat: calc hash --- core/prometheus/Constants.h | 2 + .../prometheus/schedulers/ScrapeScheduler.cpp | 10 +- core/prometheus/schedulers/ScrapeScheduler.h | 3 +- .../schedulers/TargetSubscriberScheduler.cpp | 64 +++++-- .../schedulers/TargetSubscriberScheduler.h | 11 +- .../TargetSubscriberSchedulerUnittest.cpp | 164 +++++++++++++++--- 6 files changed, 206 insertions(+), 48 deletions(-) diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index f930b77e1a..b842797c4d 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -100,6 +100,8 @@ const char* const SCHEME_LABEL_NAME = "__scheme__"; const char* const METRICS_PATH_LABEL_NAME = "__metrics_path__"; const char* const PARAM_LABEL_NAME = "__param_"; const char* const LABELS = "labels"; +const char* const TARGET_HASH = "hash"; +const char* const TARGET_IMMEDIATE = "target_immediate"; // auto metrics const char* const SCRAPE_STATE = "scrape_state"; diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index d30ab84689..c1bd0104f3 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -40,12 +40,14 @@ using namespace std; namespace logtail { ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, - std::string host, + string host, int32_t port, Labels labels, QueueKey queueKey, - size_t inputIndex) + size_t inputIndex, + string rawHash) : mScrapeConfigPtr(std::move(scrapeConfigPtr)), + mHash(rawHash), mHost(std::move(host)), mPort(port), mQueueKey(queueKey), @@ -53,9 +55,7 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, mTargetLabels(labels), mScrapeResponseSizeBytes(0) { mInstance = mHost + ":" + ToString(mPort); - std::ostringstream hash; - hash << std::setw(16) << std::setfill('0') << std::hex << labels.Hash(); - mHash = mScrapeConfigPtr->mJobName + mInstance + hash.str(); + mInterval = mScrapeConfigPtr->mScrapeIntervalSeconds; } diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index 0539ea58f2..d118a7ff7e 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -40,7 +40,8 @@ class ScrapeScheduler : public BaseScheduler { int32_t port, Labels labels, QueueKey queueKey, - size_t inputIndex); + size_t inputIndex, + std::string rawHash); ScrapeScheduler(const ScrapeScheduler&) = delete; ~ScrapeScheduler() override = default; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index f3f7647adc..bb0c6ca195 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -30,6 +30,7 @@ #include "logger/Logger.h" #include "monitor/metric_constants/MetricConstants.h" #include "prometheus/Constants.h" +#include "prometheus/PrometheusInputRunner.h" #include "prometheus/Utils.h" #include "prometheus/async/PromFuture.h" #include "prometheus/async/PromHttpRequest.h" @@ -74,7 +75,7 @@ void TargetSubscriberScheduler::OnSubscription(HttpResponse& response, uint64_t mETag = response.GetHeader().at(prometheus::ETAG); } const string& content = *response.GetBody(); - vector targetGroup; + vector targetGroup; if (!ParseScrapeSchedulerGroup(content, targetGroup)) { return; } @@ -132,7 +133,7 @@ void TargetSubscriberScheduler::UpdateScrapeScheduler( } bool TargetSubscriberScheduler::ParseScrapeSchedulerGroup(const std::string& content, - std::vector& scrapeSchedulerGroup) { + std::vector& scrapeSchedulerGroup) { string errs; Json::Value root; if (!ParseJsonTable(content, root, errs) || !root.isArray()) { @@ -165,8 +166,19 @@ bool TargetSubscriberScheduler::ParseScrapeSchedulerGroup(const std::string& con if (targets.empty()) { continue; } + PromTargetInfo targetInfo; // Parse labels Labels labels; + if (element.isMember(prometheus::LABELS) && element[prometheus::LABELS].isObject()) { + for (const string& labelKey : element[prometheus::LABELS].getMemberNames()) { + labels.Set(labelKey, element[prometheus::LABELS][labelKey].asString()); + } + } + std::ostringstream rawHashStream; + rawHashStream << std::setw(16) << std::setfill('0') << std::hex << labels.Hash(); + string rawAddress = labels.Get(prometheus::ADDRESS_LABEL_NAME); + targetInfo.mHash = mScrapeConfigPtr->mJobName + rawAddress + rawHashStream.str(); + labels.Set(prometheus::JOB, mJobName); labels.Set(prometheus::ADDRESS_LABEL_NAME, targets[0]); labels.Set(prometheus::SCHEME_LABEL_NAME, mScrapeConfigPtr->mScheme); @@ -179,22 +191,27 @@ bool TargetSubscriberScheduler::ParseScrapeSchedulerGroup(const std::string& con } } - if (element.isMember(prometheus::LABELS) && element[prometheus::LABELS].isObject()) { - for (const string& labelKey : element[prometheus::LABELS].getMemberNames()) { - labels.Set(labelKey, element[prometheus::LABELS][labelKey].asString()); - } + targetInfo.mLabels = labels; + + if (element.isMember(prometheus::TARGET_HASH) && element[prometheus::TARGET_HASH].isString()) { + targetInfo.mHash = element[prometheus::TARGET_HASH].asString(); } - scrapeSchedulerGroup.push_back(labels); + + if (element.isMember(prometheus::TARGET_IMMEDIATE) && element[prometheus::TARGET_IMMEDIATE].isBool()) { + targetInfo.mImmediate = element[prometheus::TARGET_IMMEDIATE].asBool(); + } + + scrapeSchedulerGroup.push_back(targetInfo); } return true; } std::unordered_map> -TargetSubscriberScheduler::BuildScrapeSchedulerSet(std::vector& targetGroups) { +TargetSubscriberScheduler::BuildScrapeSchedulerSet(std::vector& targetGroups) { std::unordered_map> scrapeSchedulerMap; - for (const auto& labels : targetGroups) { + for (const auto& targetInfo : targetGroups) { // Relabel Config - Labels resultLabel = labels; + Labels resultLabel = targetInfo.mLabels; vector toDelete; if (!mScrapeConfigPtr->mRelabelConfigs.Process(resultLabel, toDelete)) { continue; @@ -221,8 +238,8 @@ TargetSubscriberScheduler::BuildScrapeSchedulerSet(std::vector& targetGr } string host = address.substr(0, m); - auto scrapeScheduler - = std::make_shared(mScrapeConfigPtr, host, port, resultLabel, mQueueKey, mInputIndex); + auto scrapeScheduler = std::make_shared( + mScrapeConfigPtr, host, port, resultLabel, mQueueKey, mInputIndex, targetInfo.mHash); scrapeScheduler->SetComponent(mTimer, mEventPool); @@ -296,6 +313,7 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: if (!mETag.empty()) { httpHeader[prometheus::IF_NONE_MATCH] = mETag; } + auto body = TargetsInfoToString(); auto request = std::make_unique(HTTP_GET, false, mServiceHost, @@ -303,7 +321,7 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: "/jobs/" + URLEncode(GetId()) + "/targets", "collector_id=" + mPodName, httpHeader, - "", + body, HttpResponse(), prometheus::RefeshIntervalSeconds, 1, @@ -313,6 +331,26 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: return timerEvent; } +string TargetSubscriberScheduler::TargetsInfoToString() const { + Json::Value root; + auto agentInfo = PrometheusInputRunner::GetInstance()->GetAgentInfo(); + root[prometheus::AGENT_INFO][prometheus::CPU_USAGE] = agentInfo.mCpuUsage; + root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = agentInfo.mCpuLimit; + root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; + root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; + root[prometheus::AGENT_INFO][prometheus::HEALTH] = agentInfo.mHealth; + { + ReadLock lock(mRWLock); + for (const auto& [k, v] : mScrapeSchedulerMap) { + Json::Value targetInfo; + targetInfo[prometheus::HASH] = v->GetId(); + targetInfo[prometheus::SIZE] = v->GetLastScrapeSize(); + root[prometheus::TARGETS_INFO].append(targetInfo); + } + } + return root.toStyledString(); +} + void TargetSubscriberScheduler::CancelAllScrapeScheduler() { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.h b/core/prometheus/schedulers/TargetSubscriberScheduler.h index f034bd3dad..ddd8dbbdb5 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.h +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.h @@ -33,6 +33,13 @@ namespace logtail { +struct PromTargetInfo { + Labels mLabels; + std::string mTarget; + std::string mHash; + bool mImmediate = false; +}; + class TargetSubscriberScheduler : public BaseScheduler { public: TargetSubscriberScheduler(); @@ -63,10 +70,10 @@ class TargetSubscriberScheduler : public BaseScheduler { uint64_t mUnRegisterMs; private: - bool ParseScrapeSchedulerGroup(const std::string& content, std::vector& scrapeSchedulerGroup); + bool ParseScrapeSchedulerGroup(const std::string& content, std::vector& scrapeSchedulerGroup); std::unordered_map> - BuildScrapeSchedulerSet(std::vector& scrapeSchedulerGroup); + BuildScrapeSchedulerSet(std::vector& scrapeSchedulerGroup); std::unique_ptr BuildSubscriberTimerEvent(std::chrono::steady_clock::time_point execTime); std::string TargetsInfoToString() const; diff --git a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp index a9db51e3b4..d856f525d1 100644 --- a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp +++ b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp @@ -48,7 +48,7 @@ class TargetSubscriberSchedulerUnittest : public ::testing::Test { "enable_http2": true, "follow_redirects": true, "honor_timestamps": false, - "job_name": "_kube-state-metrics", + "job_name": "loong-collector/demo-podmonitor-500/0", "kubernetes_sd_configs": [ { "enable_http2": true, @@ -81,34 +81,141 @@ class TargetSubscriberSchedulerUnittest : public ::testing::Test { *mHttpResponse.GetBody() = R"JSON([ { "targets": [ - "192.168.22.7:8080" + "10.0.2.81:8080" ], "labels": { - "__meta_kubernetes_pod_controller_kind": "ReplicaSet", - "__meta_kubernetes_pod_container_image": "registry-vpc.cn-hangzhou.aliyuncs.com/acs/kube-state-metrics:v2.3.0-a71f78c-aliyun", - "__meta_kubernetes_namespace": "arms-prom", - "__meta_kubernetes_pod_labelpresent_pod_template_hash": "true", - "__meta_kubernetes_pod_uid": "00d1897f-d442-47c4-8423-e9bf32dea173", + "__meta_kubernetes_pod_labelpresent_label_key_47": "true", + "__meta_kubernetes_pod_labelpresent_label_key_07": "true", + "__meta_kubernetes_pod_labelpresent_label_key_18": "true", + "__meta_kubernetes_pod_annotationpresent_prometheus_io_port": "true", + "__meta_kubernetes_pod_labelpresent_label_key_08": "true", + "__meta_kubernetes_pod_labelpresent_label_key_15": "true", + "__meta_kubernetes_pod_labelpresent_label_key_32": "true", + "__meta_kubernetes_pod_label_label_key_07": "label_value_07", + "__meta_kubernetes_pod_labelpresent_label_key_33": "true", + "__meta_kubernetes_pod_labelpresent_label_key_26": "true", + "__meta_kubernetes_pod_label_label_key_17": "label_value_17", + "__meta_kubernetes_pod_label_label_key_38": "label_value_38", + "__meta_kubernetes_pod_annotationpresent_prometheus_io_scrape": "true", "__meta_kubernetes_pod_container_init": "false", - "__meta_kubernetes_pod_container_port_protocol": "TCP", - "__meta_kubernetes_pod_host_ip": "192.168.21.234", - "__meta_kubernetes_pod_controller_name": "kube-state-metrics-64cf88c8f4", - "__meta_kubernetes_pod_annotation_k8s_aliyun_com_pod_ips": "192.168.22.7", + "__meta_kubernetes_pod_labelpresent_label_key_35": "true", + "__meta_kubernetes_pod_label_label_key_13": "label_value_13", + "__meta_kubernetes_pod_name": "demo-app-500-5c97455f77-brddj", + "__meta_kubernetes_pod_label_label_key_32": "label_value_32", + "__meta_kubernetes_pod_labelpresent_label_key_01": "true", + "__meta_kubernetes_pod_label_label_key_12": "label_value_12", + "__meta_kubernetes_pod_label_label_key_11": "label_value_11", + "__meta_kubernetes_pod_label_label_key_04": "label_value_04", + "__meta_kubernetes_pod_labelpresent_label_key_42": "true", + "__meta_kubernetes_pod_uid": "c640e01c-0c1e-487e-9d1b-a743b88bb01a", + "__meta_kubernetes_pod_labelpresent_label_key_10": "true", + "__meta_kubernetes_pod_label_label_key_36": "label_value_36", + "__meta_kubernetes_pod_label_label_key_43": "label_value_43", + "__meta_kubernetes_pod_labelpresent_label_key_24": "true", + "__meta_kubernetes_pod_labelpresent_label_key_04": "true", + "__meta_kubernetes_pod_label_label_key_01": "label_value_01", + "__meta_kubernetes_pod_label_label_key_09": "label_value_09", + "__meta_kubernetes_pod_label_label_key_00": "label_value_00", + "__meta_kubernetes_pod_labelpresent_label_key_34": "true", + "__meta_kubernetes_pod_labelpresent_pod_template_hash": "true", + "__meta_kubernetes_pod_labelpresent_label_key_41": "true", + "__meta_kubernetes_pod_label_label_key_19": "label_value_19", + "__meta_kubernetes_pod_label_label_key_10": "label_value_10", + "__meta_kubernetes_pod_label_label_key_35": "label_value_35", + "__meta_kubernetes_pod_labelpresent_app": "true", + "__meta_kubernetes_pod_controller_kind": "ReplicaSet", + "__meta_kubernetes_pod_label_label_key_49": "label_value_49", + "__meta_kubernetes_pod_labelpresent_label_key_05": "true", + "__meta_kubernetes_pod_label_label_key_25": "label_value_25", + "__meta_kubernetes_pod_labelpresent_label_key_49": "true", + "__meta_kubernetes_pod_labelpresent_label_key_06": "true", + "__meta_kubernetes_pod_label_label_key_23": "label_value_23", + "__meta_kubernetes_pod_labelpresent_label_key_27": "true", + "__meta_kubernetes_pod_labelpresent_label_key_25": "true", + "__meta_kubernetes_pod_labelpresent_label_key_38": "true", + "__meta_kubernetes_pod_label_label_key_02": "label_value_02", + "__meta_kubernetes_pod_label_label_key_22": "label_value_22", + "__meta_kubernetes_pod_annotation_prometheus_io_port": "8080", + "__meta_kubernetes_pod_phase": "Running", + "__meta_kubernetes_pod_label_label_key_27": "label_value_27", + "__meta_kubernetes_pod_labelpresent_label_key_28": "true", + "__meta_kubernetes_pod_label_label_key_41": "label_value_41", + "__meta_kubernetes_pod_label_label_key_24": "label_value_24", + "__meta_kubernetes_pod_label_label_key_20": "label_value_20", + "__meta_kubernetes_pod_label_pod_template_hash": "5c97455f77", + "__meta_kubernetes_pod_label_label_key_33": "label_value_33", + "__meta_kubernetes_pod_labelpresent_label_key_40": "true", + "__meta_kubernetes_pod_labelpresent_label_key_03": "true", + "__meta_kubernetes_pod_label_label_key_08": "label_value_08", + "__meta_kubernetes_pod_container_port_number": "8080", + "__meta_kubernetes_pod_labelpresent_label_key_31": "true", + "__meta_kubernetes_pod_label_label_key_48": "label_value_48", + "__meta_kubernetes_pod_label_label_key_40": "label_value_40", + "__meta_kubernetes_pod_label_label_key_28": "label_value_28", + "__meta_kubernetes_pod_labelpresent_label_key_23": "true", + "__meta_kubernetes_pod_labelpresent_label_key_48": "true", + "__meta_kubernetes_pod_labelpresent_label_key_43": "true", + "__meta_kubernetes_pod_labelpresent_label_key_46": "true", + "__meta_kubernetes_pod_label_label_key_46": "label_value_46", + "__meta_kubernetes_pod_controller_name": "demo-app-500-5c97455f77", + "__meta_kubernetes_pod_host_ip": "10.0.4.54", + "__meta_kubernetes_pod_labelpresent_label_key_44": "true", + "__meta_kubernetes_pod_label_label_key_44": "label_value_44", + "__meta_kubernetes_pod_node_name": "cn-heyuan.10.0.4.54", + "__address__": "10.0.2.81:8080", + "__meta_kubernetes_pod_container_name": "demo-app-500", + "__meta_kubernetes_pod_labelpresent_label_key_30": "true", + "__meta_kubernetes_pod_labelpresent_label_key_17": "true", + "__meta_kubernetes_pod_label_label_key_29": "label_value_29", + "__meta_kubernetes_pod_labelpresent_label_key_02": "true", + "__meta_kubernetes_pod_label_label_key_39": "label_value_39", + "__meta_kubernetes_pod_label_label_key_21": "label_value_21", + "__meta_kubernetes_pod_label_label_key_37": "label_value_37", + "__meta_kubernetes_pod_labelpresent_label_key_37": "true", + "__meta_kubernetes_pod_labelpresent_label_key_45": "true", + "__meta_kubernetes_pod_label_label_key_45": "label_value_45", + "__meta_kubernetes_pod_label_label_key_26": "label_value_26", + "__meta_kubernetes_pod_labelpresent_label_key_12": "true", + "__meta_kubernetes_pod_labelpresent_label_key_11": "true", + "__meta_kubernetes_pod_label_app": "demo-app-500", + "__meta_kubernetes_pod_labelpresent_label_key_21": "true", + "__meta_kubernetes_pod_labelpresent_label_key_13": "true", + "__meta_kubernetes_pod_ip": "10.0.2.81", + "__meta_kubernetes_pod_label_label_key_42": "label_value_42", + "__meta_kubernetes_pod_annotation_k8s_aliyun_com_pod_ips": "10.0.2.81", + "__meta_kubernetes_pod_label_label_key_34": "label_value_34", + "__meta_kubernetes_pod_label_label_key_06": "label_value_06", + "__meta_kubernetes_pod_label_label_key_31": "label_value_31", + "__meta_kubernetes_pod_annotation_prometheus_io_path": "/metrics", + "__meta_kubernetes_pod_container_id": "containerd://788da13840e1e8711f71b42015cdaabf590b10e8658524a9e3de910a4f373532", "__meta_kubernetes_pod_ready": "true", - "__meta_kubernetes_pod_node_name": "cn-hangzhou.192.168.21.234", + "__meta_kubernetes_pod_labelpresent_label_key_00": "true", + "__meta_kubernetes_pod_label_label_key_03": "label_value_03", + "__meta_kubernetes_pod_labelpresent_label_key_16": "true", + "__meta_kubernetes_pod_container_port_name": "", + "__meta_kubernetes_pod_label_label_key_16": "label_value_16", + "__meta_kubernetes_pod_label_label_key_15": "label_value_15", + "__meta_kubernetes_pod_container_image": "arms-deploy-registry.cn-hangzhou.cr.aliyuncs.com/arms-deploy-repo/prometheus-sample-app:latest", + "__meta_kubernetes_pod_annotation_prometheus_io_scrape": "true", + "__meta_kubernetes_pod_labelpresent_label_key_20": "true", + "__meta_kubernetes_pod_labelpresent_label_key_19": "true", + "__meta_kubernetes_pod_label_label_key_18": "label_value_18", + "__meta_kubernetes_pod_annotationpresent_prometheus_io_path": "true", "__meta_kubernetes_pod_annotationpresent_k8s_aliyun_com_pod_ips": "true", - "__address__": "192.168.22.7:8080", - "__meta_kubernetes_pod_labelpresent_k8s_app": "true", - "__meta_kubernetes_pod_label_k8s_app": "kube-state-metrics", - "__meta_kubernetes_pod_container_id": "containerd://57c4dfd8d9ea021defb248dfbc5cc3bd3758072c4529be351b8cc6838bdff02f", - "__meta_kubernetes_pod_container_port_number": "8080", - "__meta_kubernetes_pod_ip": "192.168.22.7", - "__meta_kubernetes_pod_phase": "Running", - "__meta_kubernetes_pod_container_name": "kube-state-metrics", - "__meta_kubernetes_pod_container_port_name": "http-metrics", - "__meta_kubernetes_pod_label_pod_template_hash": "64cf88c8f4", - "__meta_kubernetes_pod_name": "kube-state-metrics-64cf88c8f4-jtn6v" - } + "__meta_kubernetes_pod_labelpresent_label_key_39": "true", + "__meta_kubernetes_pod_label_label_key_30": "label_value_30", + "__meta_kubernetes_pod_labelpresent_label_key_14": "true", + "__meta_kubernetes_pod_label_label_key_05": "label_value_05", + "__meta_kubernetes_pod_labelpresent_label_key_22": "true", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_labelpresent_label_key_09": "true", + "__meta_kubernetes_pod_labelpresent_label_key_36": "true", + "__meta_kubernetes_pod_label_label_key_47": "label_value_47", + "__meta_kubernetes_pod_label_label_key_14": "label_value_14", + "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_pod_labelpresent_label_key_29": "true" + }, + "Load": 425 }, { "targets": [ @@ -165,7 +272,7 @@ void TargetSubscriberSchedulerUnittest::OnInitScrapeJobEvent() { APSARA_TEST_TRUE(targetSubscriber->Init(mConfig["ScrapeConfig"])); APSARA_TEST_NOT_EQUAL(targetSubscriber->mScrapeConfigPtr.get(), nullptr); - APSARA_TEST_EQUAL(targetSubscriber->mJobName, "_kube-state-metrics"); + APSARA_TEST_EQUAL(targetSubscriber->mJobName, "loong-collector/demo-podmonitor-500/0"); } void TargetSubscriberSchedulerUnittest::TestProcess() { @@ -189,7 +296,7 @@ void TargetSubscriberSchedulerUnittest::TestParseTargetGroups() { std::shared_ptr targetSubscriber = std::make_shared(); APSARA_TEST_TRUE(targetSubscriber->Init(mConfig["ScrapeConfig"])); - std::vector newScrapeSchedulerSet; + std::vector newScrapeSchedulerSet; APSARA_TEST_TRUE( targetSubscriber->ParseScrapeSchedulerGroup(*mHttpResponse.GetBody(), newScrapeSchedulerSet)); APSARA_TEST_EQUAL(3UL, newScrapeSchedulerSet.size()); @@ -199,7 +306,7 @@ void TargetSubscriberSchedulerUnittest::TestBuildScrapeSchedulerSet() { // prepare data std::shared_ptr targetSubscriber = std::make_shared(); APSARA_TEST_TRUE(targetSubscriber->Init(mConfig["ScrapeConfig"])); - std::vector newScrapeSchedulerSet; + std::vector newScrapeSchedulerSet; APSARA_TEST_TRUE( targetSubscriber->ParseScrapeSchedulerGroup(*mHttpResponse.GetBody(), newScrapeSchedulerSet)); APSARA_TEST_EQUAL(3UL, newScrapeSchedulerSet.size()); @@ -210,11 +317,14 @@ void TargetSubscriberSchedulerUnittest::TestBuildScrapeSchedulerSet() { startTimeList.reserve(result.size()); for (auto& it : result) { startTimeList.emplace_back(it.second->GetId(), it.second->GetNextExecTime()); + std::cout << it.second->GetId() << std::endl; } APSARA_TEST_EQUAL(3UL, startTimeList.size()); APSARA_TEST_NOT_EQUAL(startTimeList[0].second, startTimeList[1].second); APSARA_TEST_NOT_EQUAL(startTimeList[1].second, startTimeList[2].second); APSARA_TEST_NOT_EQUAL(startTimeList[0].second, startTimeList[2].second); + + APSARA_TEST_EQUAL(1UL, result.count("loong-collector/demo-podmonitor-500/010.0.2.81:808093796c8e4493906d")); } void TargetSubscriberSchedulerUnittest::TestTargetsInfoToString() { From 5ca13cb13f17302f6b67ed163907c90493af9517 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 9 Jan 2025 05:43:01 +0000 Subject: [PATCH 12/48] chore: update --- .../prometheus/schedulers/ScrapeScheduler.cpp | 24 +++++++++---------- core/prometheus/schedulers/ScrapeScheduler.h | 18 ++++++++------ .../schedulers/TargetSubscriberScheduler.cpp | 10 ++++---- .../schedulers/TargetSubscriberScheduler.h | 11 +-------- .../TargetSubscriberSchedulerUnittest.cpp | 1 - 5 files changed, 30 insertions(+), 34 deletions(-) diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index c1bd0104f3..950ebccd04 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -22,7 +22,6 @@ #include #include -#include "common/StringTools.h" #include "common/TimeUtil.h" #include "common/http/Constant.h" #include "common/timer/HttpRequestTimerEvent.h" @@ -42,20 +41,16 @@ namespace logtail { ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, string host, int32_t port, - Labels labels, QueueKey queueKey, size_t inputIndex, - string rawHash) + const PromTargetInfo& targetInfo) : mScrapeConfigPtr(std::move(scrapeConfigPtr)), - mHash(rawHash), mHost(std::move(host)), mPort(port), + mTargetInfo(targetInfo), mQueueKey(queueKey), mInputIndex(inputIndex), - mTargetLabels(labels), mScrapeResponseSizeBytes(0) { - mInstance = mHost + ":" + ToString(mPort); - mInterval = mScrapeConfigPtr->mScrapeIntervalSeconds; } @@ -85,8 +80,8 @@ void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t) { if (response.GetStatusCode() != 200) { LOG_WARNING(sLogger, - ("scrape failed, status code", - response.GetStatusCode())("target", mHash)("curl msg", response.GetNetworkStatus().mMessage)); + ("scrape failed, status code", response.GetStatusCode())("target", mTargetInfo.mHash)( + "curl msg", response.GetNetworkStatus().mMessage)); } auto scrapeDurationSeconds = scrapeDurationMilliSeconds * sRate; @@ -103,7 +98,11 @@ void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t) { string ScrapeScheduler::GetId() const { - return mHash; + return mTargetInfo.mHash; +} + +string ScrapeScheduler::GetHashForOperator() const { + return mTargetInfo.mHashForOperator; } void ScrapeScheduler::SetComponent(shared_ptr timer, EventPool* eventPool) { @@ -175,7 +174,8 @@ std::unique_ptr ScrapeScheduler::BuildScrapeTimerEvent(std::chrono:: mScrapeConfigPtr->mRequestHeaders, "", HttpResponse( - new prom::StreamScraper(mTargetLabels, mQueueKey, mInputIndex, mHash, mEventPool, mLatestScrapeTime), + new prom::StreamScraper( + mTargetInfo.mLabels, mQueueKey, mInputIndex, mTargetInfo.mHash, mEventPool, mLatestScrapeTime), [](void* p) { delete static_cast(p); }, prom::StreamScraper::MetricWriteCallback), mScrapeConfigPtr->mScrapeTimeoutSeconds, @@ -205,7 +205,7 @@ void ScrapeScheduler::Cancel() { void ScrapeScheduler::InitSelfMonitor(const MetricLabels& defaultLabels) { mSelfMonitor = std::make_shared(); MetricLabels labels = defaultLabels; - labels.emplace_back(METRIC_LABEL_KEY_INSTANCE, mInstance); + labels.emplace_back(METRIC_LABEL_KEY_INSTANCE, mTargetInfo.mInstance); static const std::unordered_map sScrapeMetricKeys = {{METRIC_PLUGIN_OUT_EVENTS_TOTAL, MetricType::METRIC_TYPE_COUNTER}, diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index d118a7ff7e..8b5081cb07 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -24,7 +24,6 @@ #include "monitor/metric_models/MetricTypes.h" #include "pipeline/queue/QueueKey.h" #include "prometheus/PromSelfMonitor.h" -#include "prometheus/component/StreamScraper.h" #include "prometheus/schedulers/ScrapeConfig.h" #ifdef APSARA_UNIT_TEST_MAIN @@ -33,21 +32,29 @@ namespace logtail { +struct PromTargetInfo { + Labels mLabels; + std::string mInstance; + std::string mHash; + std::string mHashForOperator; + bool mImmediate = false; +}; + class ScrapeScheduler : public BaseScheduler { public: ScrapeScheduler(std::shared_ptr scrapeConfigPtr, std::string host, int32_t port, - Labels labels, QueueKey queueKey, size_t inputIndex, - std::string rawHash); + const PromTargetInfo& targetInfo); ScrapeScheduler(const ScrapeScheduler&) = delete; ~ScrapeScheduler() override = default; void OnMetricResult(HttpResponse&, uint64_t timestampMilliSec); std::string GetId() const; + std::string GetHashForOperator() const; void SetComponent(std::shared_ptr timer, EventPool* eventPool); uint64_t GetLastScrapeSize() const { return mScrapeResponseSizeBytes; } @@ -61,17 +68,14 @@ class ScrapeScheduler : public BaseScheduler { std::unique_ptr BuildScrapeTimerEvent(std::chrono::steady_clock::time_point execTime); std::shared_ptr mScrapeConfigPtr; - std::string mHash; std::string mHost; int32_t mPort; - std::string mInstance; + PromTargetInfo mTargetInfo; // pipeline QueueKey mQueueKey; size_t mInputIndex; - Labels mTargetLabels; - // auto metrics std::atomic_uint64_t mScrapeResponseSizeBytes; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index bb0c6ca195..1c985208ed 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -178,6 +178,8 @@ bool TargetSubscriberScheduler::ParseScrapeSchedulerGroup(const std::string& con rawHashStream << std::setw(16) << std::setfill('0') << std::hex << labels.Hash(); string rawAddress = labels.Get(prometheus::ADDRESS_LABEL_NAME); targetInfo.mHash = mScrapeConfigPtr->mJobName + rawAddress + rawHashStream.str(); + targetInfo.mHashForOperator = targetInfo.mHash; + targetInfo.mInstance = targets[0]; labels.Set(prometheus::JOB, mJobName); labels.Set(prometheus::ADDRESS_LABEL_NAME, targets[0]); @@ -194,7 +196,7 @@ bool TargetSubscriberScheduler::ParseScrapeSchedulerGroup(const std::string& con targetInfo.mLabels = labels; if (element.isMember(prometheus::TARGET_HASH) && element[prometheus::TARGET_HASH].isString()) { - targetInfo.mHash = element[prometheus::TARGET_HASH].asString(); + targetInfo.mHashForOperator = element[prometheus::TARGET_HASH].asString(); } if (element.isMember(prometheus::TARGET_IMMEDIATE) && element[prometheus::TARGET_IMMEDIATE].isBool()) { @@ -238,8 +240,8 @@ TargetSubscriberScheduler::BuildScrapeSchedulerSet(std::vector& } string host = address.substr(0, m); - auto scrapeScheduler = std::make_shared( - mScrapeConfigPtr, host, port, resultLabel, mQueueKey, mInputIndex, targetInfo.mHash); + auto scrapeScheduler + = std::make_shared(mScrapeConfigPtr, host, port, mQueueKey, mInputIndex, targetInfo); scrapeScheduler->SetComponent(mTimer, mEventPool); @@ -343,7 +345,7 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { Json::Value targetInfo; - targetInfo[prometheus::HASH] = v->GetId(); + targetInfo[prometheus::HASH] = v->GetHashForOperator(); targetInfo[prometheus::SIZE] = v->GetLastScrapeSize(); root[prometheus::TARGETS_INFO].append(targetInfo); } diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.h b/core/prometheus/schedulers/TargetSubscriberScheduler.h index ddd8dbbdb5..65e1a9a3b6 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.h +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.h @@ -17,14 +17,12 @@ #pragma once #include +#include #include #include -#include "json/json.h" - #include "common/http/HttpResponse.h" -#include "common/timer/Timer.h" #include "pipeline/queue/QueueKey.h" #include "prometheus/PromSelfMonitor.h" #include "prometheus/schedulers/BaseScheduler.h" @@ -33,13 +31,6 @@ namespace logtail { -struct PromTargetInfo { - Labels mLabels; - std::string mTarget; - std::string mHash; - bool mImmediate = false; -}; - class TargetSubscriberScheduler : public BaseScheduler { public: TargetSubscriberScheduler(); diff --git a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp index d856f525d1..4e4d84b6f7 100644 --- a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp +++ b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp @@ -317,7 +317,6 @@ void TargetSubscriberSchedulerUnittest::TestBuildScrapeSchedulerSet() { startTimeList.reserve(result.size()); for (auto& it : result) { startTimeList.emplace_back(it.second->GetId(), it.second->GetNextExecTime()); - std::cout << it.second->GetId() << std::endl; } APSARA_TEST_EQUAL(3UL, startTimeList.size()); APSARA_TEST_NOT_EQUAL(startTimeList[0].second, startTimeList[1].second); From 2cb6eb3ee73366322bd404ac5e14f634fba95bc3 Mon Sep 17 00:00:00 2001 From: liqiang Date: Mon, 13 Jan 2025 08:03:30 +0000 Subject: [PATCH 13/48] update --- core/prometheus/Constants.h | 2 +- core/prometheus/schedulers/ScrapeScheduler.h | 3 +- .../schedulers/TargetSubscriberScheduler.cpp | 29 ++++++++++++------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index b842797c4d..941f5f5c1c 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -101,7 +101,7 @@ const char* const METRICS_PATH_LABEL_NAME = "__metrics_path__"; const char* const PARAM_LABEL_NAME = "__param_"; const char* const LABELS = "labels"; const char* const TARGET_HASH = "hash"; -const char* const TARGET_IMMEDIATE = "target_immediate"; +const char* const REBALANCE_MS = "rebalance_ms"; // auto metrics const char* const SCRAPE_STATE = "scrape_state"; diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index 8b5081cb07..0232c24785 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -37,7 +37,7 @@ struct PromTargetInfo { std::string mInstance; std::string mHash; std::string mHashForOperator; - bool mImmediate = false; + uint64_t mRebalanceMs = 0; }; class ScrapeScheduler : public BaseScheduler { @@ -59,6 +59,7 @@ class ScrapeScheduler : public BaseScheduler { void SetComponent(std::shared_ptr timer, EventPool* eventPool); uint64_t GetLastScrapeSize() const { return mScrapeResponseSizeBytes; } + uint64_t GetReBalanceMs() const { return mTargetInfo.mRebalanceMs; } void ScheduleNext() override; void ScrapeOnce(std::chrono::steady_clock::time_point execTime); void Cancel() override; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 1c985208ed..0d63113c9d 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -26,7 +26,6 @@ #include "common/TimeUtil.h" #include "common/http/Constant.h" #include "common/timer/HttpRequestTimerEvent.h" -#include "common/timer/Timer.h" #include "logger/Logger.h" #include "monitor/metric_constants/MetricConstants.h" #include "prometheus/Constants.h" @@ -105,8 +104,10 @@ void TargetSubscriberScheduler::UpdateScrapeScheduler( } // save new scrape work + auto added = 0; for (const auto& [k, v] : newScrapeSchedulerMap) { if (mScrapeSchedulerMap.find(k) == mScrapeSchedulerMap.end()) { + added++; mScrapeSchedulerMap[k] = v; if (mTimer) { auto tmpCurrentMilliSeconds = GetCurrentTimeInMilliSeconds(); @@ -114,13 +115,20 @@ void TargetSubscriberScheduler::UpdateScrapeScheduler( v->GetId(), mScrapeConfigPtr->mScrapeIntervalSeconds, tmpCurrentMilliSeconds); // zero-cost upgrade - if (mUnRegisterMs > 0 - && (tmpCurrentMilliSeconds + tmpRandSleepMilliSec - - (uint64_t)mScrapeConfigPtr->mScrapeIntervalSeconds * 1000 - > mUnRegisterMs) - && (tmpCurrentMilliSeconds + tmpRandSleepMilliSec - - (uint64_t)mScrapeConfigPtr->mScrapeIntervalSeconds * 1000 * 2 - < mUnRegisterMs)) { + if ((mUnRegisterMs > 0 + && (tmpCurrentMilliSeconds + tmpRandSleepMilliSec + - (uint64_t)mScrapeConfigPtr->mScrapeIntervalSeconds * 1000 + > mUnRegisterMs) + && (tmpCurrentMilliSeconds + tmpRandSleepMilliSec + - (uint64_t)mScrapeConfigPtr->mScrapeIntervalSeconds * 1000 * 2 + < mUnRegisterMs)) + || (v->GetReBalanceMs() > 0 + && (tmpCurrentMilliSeconds + tmpRandSleepMilliSec + - (uint64_t)mScrapeConfigPtr->mScrapeIntervalSeconds * 1000 + > v->GetReBalanceMs()) + && (tmpCurrentMilliSeconds + tmpRandSleepMilliSec + - (uint64_t)mScrapeConfigPtr->mScrapeIntervalSeconds * 1000 * 2 + < v->GetReBalanceMs()))) { // scrape once just now LOG_INFO(sLogger, ("scrape zero cost", ToString(tmpCurrentMilliSeconds))); v->SetScrapeOnceTime(chrono::steady_clock::now(), chrono::system_clock::now()); @@ -129,6 +137,7 @@ void TargetSubscriberScheduler::UpdateScrapeScheduler( } } } + LOG_INFO(sLogger, ("prom targets removed", toRemove.size())("added", added)); } } @@ -199,8 +208,8 @@ bool TargetSubscriberScheduler::ParseScrapeSchedulerGroup(const std::string& con targetInfo.mHashForOperator = element[prometheus::TARGET_HASH].asString(); } - if (element.isMember(prometheus::TARGET_IMMEDIATE) && element[prometheus::TARGET_IMMEDIATE].isBool()) { - targetInfo.mImmediate = element[prometheus::TARGET_IMMEDIATE].asBool(); + if (element.isMember(prometheus::REBALANCE_MS) && element[prometheus::REBALANCE_MS].isUInt64()) { + targetInfo.mRebalanceMs = element[prometheus::REBALANCE_MS].asUInt64(); } scrapeSchedulerGroup.push_back(targetInfo); From 0874105770500ed2cf0a0842280d880b71bb4b98 Mon Sep 17 00:00:00 2001 From: liqiang Date: Mon, 13 Jan 2025 08:15:23 +0000 Subject: [PATCH 14/48] update --- core/prometheus/PrometheusInputRunner.h | 2 +- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index 3789e45e74..aa8f0863a6 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -84,7 +84,7 @@ class PrometheusInputRunner : public InputRunner { std::string mPodName; std::mutex mAgentInfoMutex; - PromAgentInfo mAgentInfo{0, 0, 0, 0}; + PromAgentInfo mAgentInfo{0, 0, 0, 0, 1}; std::chrono::steady_clock::time_point mLastUpdateTime; std::shared_ptr mTimer; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 0d63113c9d..6f440836d5 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -26,6 +26,7 @@ #include "common/TimeUtil.h" #include "common/http/Constant.h" #include "common/timer/HttpRequestTimerEvent.h" +#include "common/timer/Timer.h" #include "logger/Logger.h" #include "monitor/metric_constants/MetricConstants.h" #include "prometheus/Constants.h" From 60d1a82df9ce466c5512937f97ffdbe8159a9c27 Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 14 Jan 2025 01:55:23 +0000 Subject: [PATCH 15/48] update --- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 6f440836d5..e03350db72 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -26,7 +26,6 @@ #include "common/TimeUtil.h" #include "common/http/Constant.h" #include "common/timer/HttpRequestTimerEvent.h" -#include "common/timer/Timer.h" #include "logger/Logger.h" #include "monitor/metric_constants/MetricConstants.h" #include "prometheus/Constants.h" @@ -138,7 +137,7 @@ void TargetSubscriberScheduler::UpdateScrapeScheduler( } } } - LOG_INFO(sLogger, ("prom targets removed", toRemove.size())("added", added)); + LOG_INFO(sLogger, ("prom job", mJobName)("targets removed", toRemove.size())("added", added)); } } From 3c43d207e807f080490f6565323454607675a186 Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 14 Jan 2025 05:24:13 +0000 Subject: [PATCH 16/48] feat: update health calc --- core/prometheus/Constants.h | 1 + core/prometheus/schedulers/ScrapeScheduler.cpp | 1 + core/prometheus/schedulers/ScrapeScheduler.h | 2 ++ core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 4 ++++ 4 files changed, 8 insertions(+) diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index 941f5f5c1c..e826fb5553 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -130,5 +130,6 @@ const char* const MEM_USAGE = "MemUsage"; const char* const HASH = "Hash"; const char* const SIZE = "Size"; const char* const HEALTH = "Health"; +const char* const SCRAPE_DELAY_MS = "ScrapeDelayMs"; } // namespace logtail::prometheus diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index 950ebccd04..7048765056 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -124,6 +124,7 @@ void ScrapeScheduler::ScheduleNext() { return true; } this->DelayExecTime(1); + this->mExecDelayCount++; this->mPromDelayTotal->Add(1); this->ScheduleNext(); return false; diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index 0232c24785..b25adced02 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -41,6 +41,7 @@ struct PromTargetInfo { }; class ScrapeScheduler : public BaseScheduler { + friend class TargetSubscriberScheduler; public: ScrapeScheduler(std::shared_ptr scrapeConfigPtr, std::string host, @@ -69,6 +70,7 @@ class ScrapeScheduler : public BaseScheduler { std::unique_ptr BuildScrapeTimerEvent(std::chrono::steady_clock::time_point execTime); std::shared_ptr mScrapeConfigPtr; + std::atomic_int mExecDelayCount = 0; std::string mHost; int32_t mPort; PromTargetInfo mTargetInfo; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index e03350db72..5e2ae61642 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -350,15 +350,19 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; root[prometheus::AGENT_INFO][prometheus::HEALTH] = agentInfo.mHealth; + int execDelayCountSec = 0; { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { Json::Value targetInfo; targetInfo[prometheus::HASH] = v->GetHashForOperator(); targetInfo[prometheus::SIZE] = v->GetLastScrapeSize(); + execDelayCountSec += v->mExecDelayCount; + v->mExecDelayCount = 0; root[prometheus::TARGETS_INFO].append(targetInfo); } } + root[prometheus::AGENT_INFO][prometheus::SCRAPE_DELAY_MS] = execDelayCountSec; return root.toStyledString(); } From 502b9d6813af24427d1cd6ac4cefb0219a43c851 Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 14 Jan 2025 05:44:41 +0000 Subject: [PATCH 17/48] update --- core/prometheus/Constants.h | 2 +- .../schedulers/TargetSubscriberScheduler.cpp | 15 +++++++++++---- .../schedulers/TargetSubscriberScheduler.h | 2 ++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index e826fb5553..ade211147b 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -130,6 +130,6 @@ const char* const MEM_USAGE = "MemUsage"; const char* const HASH = "Hash"; const char* const SIZE = "Size"; const char* const HEALTH = "Health"; -const char* const SCRAPE_DELAY_MS = "ScrapeDelayMs"; +const char* const SCRAPE_DELAY_SECONDS = "ScrapeDelaySeconds"; } // namespace logtail::prometheus diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 5e2ae61642..60bb7a8a39 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -350,19 +350,26 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; root[prometheus::AGENT_INFO][prometheus::HEALTH] = agentInfo.mHealth; - int execDelayCountSec = 0; + int execDelayCountSec = -1; + auto curTime = std::chrono::steady_clock::now(); + auto needToUpdate = curTime - mLastUpdateTime >= std::chrono::seconds(prometheus::RefeshIntervalSeconds); + if (needToUpdate) { + execDelayCountSec = 0; + } { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { Json::Value targetInfo; targetInfo[prometheus::HASH] = v->GetHashForOperator(); targetInfo[prometheus::SIZE] = v->GetLastScrapeSize(); - execDelayCountSec += v->mExecDelayCount; - v->mExecDelayCount = 0; + if (needToUpdate) { + execDelayCountSec += v->mExecDelayCount; + v->mExecDelayCount = 0; + } root[prometheus::TARGETS_INFO].append(targetInfo); } } - root[prometheus::AGENT_INFO][prometheus::SCRAPE_DELAY_MS] = execDelayCountSec; + root[prometheus::AGENT_INFO][prometheus::SCRAPE_DELAY_SECONDS] = execDelayCountSec; return root.toStyledString(); } diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.h b/core/prometheus/schedulers/TargetSubscriberScheduler.h index 65e1a9a3b6..b475809e1d 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.h +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.h @@ -81,6 +81,8 @@ class TargetSubscriberScheduler : public BaseScheduler { std::string mETag; + static std::chrono::steady_clock::time_point mLastUpdateTime; + // self monitor std::shared_ptr mSelfMonitor; MetricsRecordRef mMetricsRecordRef; From 6c9d94c9f8cb931b5298b4767b72e73b91c8227b Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 14 Jan 2025 05:53:12 +0000 Subject: [PATCH 18/48] update --- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 60bb7a8a39..d5db756d82 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -41,6 +41,7 @@ namespace logtail { TargetSubscriberScheduler::TargetSubscriberScheduler() : mQueueKey(0), mInputIndex(0), mServicePort(0), mUnRegisterMs(0) { + mLastUpdateTime = std::chrono::steady_clock::now(); } bool TargetSubscriberScheduler::Init(const Json::Value& scrapeConfig) { @@ -355,6 +356,7 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { auto needToUpdate = curTime - mLastUpdateTime >= std::chrono::seconds(prometheus::RefeshIntervalSeconds); if (needToUpdate) { execDelayCountSec = 0; + mLastUpdateTime = curTime; } { ReadLock lock(mRWLock); From 1a63a128d065dee42c284bb67e3ea176e5973d57 Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 14 Jan 2025 05:54:40 +0000 Subject: [PATCH 19/48] update --- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index d5db756d82..fc0b88b6e2 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -16,6 +16,7 @@ #include "prometheus/schedulers/TargetSubscriberScheduler.h" +#include #include #include From 9f1514320654dc52dc23ab4ee7c6eca6b08977e0 Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 14 Jan 2025 06:01:53 +0000 Subject: [PATCH 20/48] update --- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 2 +- core/prometheus/schedulers/TargetSubscriberScheduler.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index fc0b88b6e2..087e760b28 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -40,9 +40,9 @@ using namespace std; namespace logtail { +std::chrono::steady_clock::time_point TargetSubscriberScheduler::mLastUpdateTime = std::chrono::steady_clock::now(); TargetSubscriberScheduler::TargetSubscriberScheduler() : mQueueKey(0), mInputIndex(0), mServicePort(0), mUnRegisterMs(0) { - mLastUpdateTime = std::chrono::steady_clock::now(); } bool TargetSubscriberScheduler::Init(const Json::Value& scrapeConfig) { diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.h b/core/prometheus/schedulers/TargetSubscriberScheduler.h index b475809e1d..f0dcc79b0e 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.h +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.h @@ -59,6 +59,7 @@ class TargetSubscriberScheduler : public BaseScheduler { // zero cost upgrade uint64_t mUnRegisterMs; + static std::chrono::steady_clock::time_point mLastUpdateTime; private: bool ParseScrapeSchedulerGroup(const std::string& content, std::vector& scrapeSchedulerGroup); @@ -81,8 +82,6 @@ class TargetSubscriberScheduler : public BaseScheduler { std::string mETag; - static std::chrono::steady_clock::time_point mLastUpdateTime; - // self monitor std::shared_ptr mSelfMonitor; MetricsRecordRef mMetricsRecordRef; From dbdc8c6e5cafc2329c183c6378561a33e50044ac Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 14 Jan 2025 10:57:52 +0000 Subject: [PATCH 21/48] update --- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 8 ++++---- core/prometheus/schedulers/TargetSubscriberScheduler.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 087e760b28..ad9f7fc246 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -41,6 +41,7 @@ using namespace std; namespace logtail { std::chrono::steady_clock::time_point TargetSubscriberScheduler::mLastUpdateTime = std::chrono::steady_clock::now(); +uint64_t TargetSubscriberScheduler::mDelaySeconds = 0; TargetSubscriberScheduler::TargetSubscriberScheduler() : mQueueKey(0), mInputIndex(0), mServicePort(0), mUnRegisterMs(0) { } @@ -352,11 +353,10 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; root[prometheus::AGENT_INFO][prometheus::HEALTH] = agentInfo.mHealth; - int execDelayCountSec = -1; auto curTime = std::chrono::steady_clock::now(); auto needToUpdate = curTime - mLastUpdateTime >= std::chrono::seconds(prometheus::RefeshIntervalSeconds); if (needToUpdate) { - execDelayCountSec = 0; + mDelaySeconds = 0; mLastUpdateTime = curTime; } { @@ -366,13 +366,13 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { targetInfo[prometheus::HASH] = v->GetHashForOperator(); targetInfo[prometheus::SIZE] = v->GetLastScrapeSize(); if (needToUpdate) { - execDelayCountSec += v->mExecDelayCount; + mDelaySeconds += v->mExecDelayCount; v->mExecDelayCount = 0; } root[prometheus::TARGETS_INFO].append(targetInfo); } } - root[prometheus::AGENT_INFO][prometheus::SCRAPE_DELAY_SECONDS] = execDelayCountSec; + root[prometheus::AGENT_INFO][prometheus::SCRAPE_DELAY_SECONDS] = mDelaySeconds; return root.toStyledString(); } diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.h b/core/prometheus/schedulers/TargetSubscriberScheduler.h index f0dcc79b0e..acaf086cf4 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.h +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.h @@ -60,7 +60,7 @@ class TargetSubscriberScheduler : public BaseScheduler { // zero cost upgrade uint64_t mUnRegisterMs; static std::chrono::steady_clock::time_point mLastUpdateTime; - + static uint64_t mDelaySeconds; private: bool ParseScrapeSchedulerGroup(const std::string& content, std::vector& scrapeSchedulerGroup); From 7eea950bb1f9a78a4570f26b4b0e9b90d933baf0 Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 14 Jan 2025 13:25:06 +0000 Subject: [PATCH 22/48] feat: add lc_target_hash --- .../inner/ProcessorPromRelabelMetricNative.cpp | 15 +++++++++++---- core/prometheus/Constants.h | 1 + core/prometheus/component/StreamScraper.cpp | 4 ++-- core/prometheus/schedulers/ScrapeScheduler.cpp | 4 ---- core/prometheus/schedulers/ScrapeScheduler.h | 2 -- .../schedulers/TargetSubscriberScheduler.cpp | 5 ++--- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp b/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp index b93fb801e5..d7fb9d364a 100644 --- a/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp +++ b/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp @@ -16,8 +16,7 @@ #include "plugin/processor/inner/ProcessorPromRelabelMetricNative.h" #include - -#include "json/json.h" +#include #include "common/Flags.h" #include "common/StringTools.h" @@ -186,18 +185,26 @@ void ProcessorPromRelabelMetricNative::UpdateAutoMetrics(const PipelineEventGrou void ProcessorPromRelabelMetricNative::AddAutoMetrics(PipelineEventGroup& eGroup, const prom::AutoMetric& autoMetric) const { - auto targetTags = eGroup.GetTags(); if (!eGroup.HasMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_TIMESTAMP_MILLISEC)) { LOG_ERROR(sLogger, ("scrape_timestamp_milliseconds is not set", "")); return; } + auto targetTags = eGroup.GetTags(); + auto toDelete = GetToDeleteTargetLabels(targetTags); + for (const auto& item : toDelete) { + targetTags.erase(item); + } + if (!eGroup.HasMetadata(EventGroupMetaKey::PROMETHEUS_STREAM_ID)) { + LOG_ERROR(sLogger, ("prometheus stream id", "")); + return; + } + targetTags[prometheus::LC_TARGET_HASH] = eGroup.GetMetadata(EventGroupMetaKey::PROMETHEUS_STREAM_ID); StringView scrapeTimestampMilliSecStr = eGroup.GetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_TIMESTAMP_MILLISEC); auto timestampMilliSec = StringTo(scrapeTimestampMilliSecStr.to_string()); auto timestamp = timestampMilliSec / 1000; auto nanoSec = timestampMilliSec % 1000 * 1000000; - AddMetric( eGroup, prometheus::SCRAPE_DURATION_SECONDS, autoMetric.mScrapeDurationSeconds, timestamp, nanoSec, targetTags); diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index ade211147b..c5f8ead7a8 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -13,6 +13,7 @@ const uint64_t RefeshIntervalSeconds = 5; const char* const META = "__meta_"; const char* const UNDEFINED = "undefined"; const std::string PROMETHEUS = "prometheus"; +const char* const LC_TARGET_HASH = "lc_target_hash"; // relabel config const char* const SOURCE_LABELS = "source_labels"; diff --git a/core/prometheus/component/StreamScraper.cpp b/core/prometheus/component/StreamScraper.cpp index 9dc9c178c0..4f6b46a7bc 100644 --- a/core/prometheus/component/StreamScraper.cpp +++ b/core/prometheus/component/StreamScraper.cpp @@ -102,7 +102,7 @@ void StreamScraper::PushEventGroup(PipelineEventGroup&& eGroup) const { void StreamScraper::SendMetrics() { mEventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_TIMESTAMP_MILLISEC, ToString(mScrapeTimestampMilliSec)); - mEventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_STREAM_ID, GetId() + ToString(mScrapeTimestampMilliSec)); + mEventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_STREAM_ID, GetId()); SetTargetLabels(mEventGroup); PushEventGroup(std::move(mEventGroup)); @@ -127,7 +127,7 @@ void StreamScraper::SetAutoMetricMeta(double scrapeDurationSeconds, bool upState mEventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_DURATION, ToString(scrapeDurationSeconds)); mEventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_RESPONSE_SIZE, ToString(mRawSize)); mEventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_UP_STATE, ToString(upState)); - mEventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_STREAM_ID, GetId() + ToString(mScrapeTimestampMilliSec)); + mEventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_STREAM_ID, GetId()); mEventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_STREAM_TOTAL, ToString(mStreamIndex)); } std::string StreamScraper::GetId() { diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index 7048765056..06fd3acba6 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -101,10 +101,6 @@ string ScrapeScheduler::GetId() const { return mTargetInfo.mHash; } -string ScrapeScheduler::GetHashForOperator() const { - return mTargetInfo.mHashForOperator; -} - void ScrapeScheduler::SetComponent(shared_ptr timer, EventPool* eventPool) { mTimer = std::move(timer); mEventPool = eventPool; diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index b25adced02..a674da3027 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -36,7 +36,6 @@ struct PromTargetInfo { Labels mLabels; std::string mInstance; std::string mHash; - std::string mHashForOperator; uint64_t mRebalanceMs = 0; }; @@ -55,7 +54,6 @@ class ScrapeScheduler : public BaseScheduler { void OnMetricResult(HttpResponse&, uint64_t timestampMilliSec); std::string GetId() const; - std::string GetHashForOperator() const; void SetComponent(std::shared_ptr timer, EventPool* eventPool); uint64_t GetLastScrapeSize() const { return mScrapeResponseSizeBytes; } diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index ad9f7fc246..109d415b69 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -190,7 +190,6 @@ bool TargetSubscriberScheduler::ParseScrapeSchedulerGroup(const std::string& con rawHashStream << std::setw(16) << std::setfill('0') << std::hex << labels.Hash(); string rawAddress = labels.Get(prometheus::ADDRESS_LABEL_NAME); targetInfo.mHash = mScrapeConfigPtr->mJobName + rawAddress + rawHashStream.str(); - targetInfo.mHashForOperator = targetInfo.mHash; targetInfo.mInstance = targets[0]; labels.Set(prometheus::JOB, mJobName); @@ -208,7 +207,7 @@ bool TargetSubscriberScheduler::ParseScrapeSchedulerGroup(const std::string& con targetInfo.mLabels = labels; if (element.isMember(prometheus::TARGET_HASH) && element[prometheus::TARGET_HASH].isString()) { - targetInfo.mHashForOperator = element[prometheus::TARGET_HASH].asString(); + targetInfo.mHash = element[prometheus::TARGET_HASH].asString(); } if (element.isMember(prometheus::REBALANCE_MS) && element[prometheus::REBALANCE_MS].isUInt64()) { @@ -363,7 +362,7 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { Json::Value targetInfo; - targetInfo[prometheus::HASH] = v->GetHashForOperator(); + targetInfo[prometheus::HASH] = v->GetId(); targetInfo[prometheus::SIZE] = v->GetLastScrapeSize(); if (needToUpdate) { mDelaySeconds += v->mExecDelayCount; From 46cfaf4ed2a9a4a6cfe8789b5c730aa892a3e276 Mon Sep 17 00:00:00 2001 From: liqiang Date: Wed, 15 Jan 2025 01:45:03 +0000 Subject: [PATCH 23/48] update --- .../schedulers/TargetSubscriberScheduler.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 109d415b69..03698b591f 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -352,26 +352,24 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; root[prometheus::AGENT_INFO][prometheus::HEALTH] = agentInfo.mHealth; - auto curTime = std::chrono::steady_clock::now(); - auto needToUpdate = curTime - mLastUpdateTime >= std::chrono::seconds(prometheus::RefeshIntervalSeconds); - if (needToUpdate) { - mDelaySeconds = 0; - mLastUpdateTime = curTime; - } { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { Json::Value targetInfo; targetInfo[prometheus::HASH] = v->GetId(); targetInfo[prometheus::SIZE] = v->GetLastScrapeSize(); - if (needToUpdate) { - mDelaySeconds += v->mExecDelayCount; - v->mExecDelayCount = 0; - } + mDelaySeconds += v->mExecDelayCount; + v->mExecDelayCount = 0; root[prometheus::TARGETS_INFO].append(targetInfo); } } + auto curTime = std::chrono::steady_clock::now(); + auto needToClear = curTime - mLastUpdateTime >= std::chrono::seconds(prometheus::RefeshIntervalSeconds); root[prometheus::AGENT_INFO][prometheus::SCRAPE_DELAY_SECONDS] = mDelaySeconds; + if (needToClear) { + mDelaySeconds = 0; + mLastUpdateTime = curTime; + } return root.toStyledString(); } From 728a34d3c1a5e573ce0a8ed599c6eddd363f7790 Mon Sep 17 00:00:00 2001 From: liqiang Date: Wed, 15 Jan 2025 04:00:01 +0000 Subject: [PATCH 24/48] update --- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 03698b591f..2218d719ff 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -222,9 +222,9 @@ bool TargetSubscriberScheduler::ParseScrapeSchedulerGroup(const std::string& con std::unordered_map> TargetSubscriberScheduler::BuildScrapeSchedulerSet(std::vector& targetGroups) { std::unordered_map> scrapeSchedulerMap; - for (const auto& targetInfo : targetGroups) { + for (auto& targetInfo : targetGroups) { // Relabel Config - Labels resultLabel = targetInfo.mLabels; + auto& resultLabel = targetInfo.mLabels; vector toDelete; if (!mScrapeConfigPtr->mRelabelConfigs.Process(resultLabel, toDelete)) { continue; From 4db3f5451964930994a75b5706a86cfd649e47a0 Mon Sep 17 00:00:00 2001 From: liqiang Date: Wed, 15 Jan 2025 08:41:29 +0000 Subject: [PATCH 25/48] prom job --- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 2218d719ff..3e61fa9460 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -108,6 +108,7 @@ void TargetSubscriberScheduler::UpdateScrapeScheduler( // save new scrape work auto added = 0; + auto total = 0; for (const auto& [k, v] : newScrapeSchedulerMap) { if (mScrapeSchedulerMap.find(k) == mScrapeSchedulerMap.end()) { added++; @@ -140,7 +141,8 @@ void TargetSubscriberScheduler::UpdateScrapeScheduler( } } } - LOG_INFO(sLogger, ("prom job", mJobName)("targets removed", toRemove.size())("added", added)); + total = mScrapeSchedulerMap.size(); + LOG_INFO(sLogger, ("prom job", mJobName)("targets removed", toRemove.size())("added", added)("total", total)); } } From 0a6d8c8286877bb9ed9afdbd5955d0b54b84c91c Mon Sep 17 00:00:00 2001 From: liqiang Date: Wed, 15 Jan 2025 09:05:50 +0000 Subject: [PATCH 26/48] update --- core/prometheus/schedulers/ScrapeScheduler.cpp | 2 +- core/prometheus/schedulers/ScrapeScheduler.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index 06fd3acba6..da6911a107 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -50,7 +50,7 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, mTargetInfo(targetInfo), mQueueKey(queueKey), mInputIndex(inputIndex), - mScrapeResponseSizeBytes(0) { + mScrapeResponseSizeBytes(-1) { mInterval = mScrapeConfigPtr->mScrapeIntervalSeconds; } diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index a674da3027..594ff96333 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -56,7 +56,7 @@ class ScrapeScheduler : public BaseScheduler { std::string GetId() const; void SetComponent(std::shared_ptr timer, EventPool* eventPool); - uint64_t GetLastScrapeSize() const { return mScrapeResponseSizeBytes; } + int64_t GetLastScrapeSize() const { return mScrapeResponseSizeBytes; } uint64_t GetReBalanceMs() const { return mTargetInfo.mRebalanceMs; } void ScheduleNext() override; @@ -78,7 +78,7 @@ class ScrapeScheduler : public BaseScheduler { size_t mInputIndex; // auto metrics - std::atomic_uint64_t mScrapeResponseSizeBytes; + std::atomic_int mScrapeResponseSizeBytes; // self monitor std::shared_ptr mSelfMonitor; From 7ff69db2dbc91d6b519c19623efa98498d944ef6 Mon Sep 17 00:00:00 2001 From: liqiang Date: Wed, 15 Jan 2025 10:01:26 +0000 Subject: [PATCH 27/48] update --- core/prometheus/PrometheusInputRunner.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index 3f3845fefb..26b476e89b 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -178,6 +178,8 @@ void PrometheusInputRunner::Init() { mUnRegisterMs = 0; } else { mUnRegisterMs.store(StringTo(tmpStr)); + // adjust unRegisterMs to scrape targets for zero-cost + mUnRegisterMs -= 1000; LOG_INFO(sLogger, ("unRegisterMs", ToString(mUnRegisterMs))); } } From 58d713c20bf3fd7226d793ea3b8407d44669550d Mon Sep 17 00:00:00 2001 From: liqiang Date: Fri, 17 Jan 2025 02:02:17 +0000 Subject: [PATCH 28/48] chore: update ut --- .../prometheus/ScrapeSchedulerUnittest.cpp | 36 +++++++++++++++---- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp b/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp index ee5be0ee00..7004016e97 100644 --- a/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp +++ b/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp @@ -24,6 +24,7 @@ #include "models/RawEvent.h" #include "prometheus/Constants.h" #include "prometheus/async/PromFuture.h" +#include "prometheus/component/StreamScraper.h" #include "prometheus/labels/Labels.h" #include "prometheus/schedulers/ScrapeConfig.h" #include "prometheus/schedulers/ScrapeScheduler.h" @@ -63,7 +64,10 @@ void ScrapeSchedulerUnittest::TestInitscrapeScheduler() { labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); labels.Set("testb", "valueb"); labels.Set("testa", "localhost:8080"); - ScrapeScheduler event(mScrapeConfig, "localhost", 8080, labels, 0, 0); + PromTargetInfo targetInfo; + targetInfo.mLabels = labels; + targetInfo.mHash = "test_joblocalhost:8080887d0db7cce49fc7"; + ScrapeScheduler event(mScrapeConfig, "localhost", 8080, 0, 0, targetInfo); APSARA_TEST_EQUAL(event.GetId(), "test_joblocalhost:8080887d0db7cce49fc7"); } @@ -74,7 +78,10 @@ void ScrapeSchedulerUnittest::TestProcess() { Labels labels; labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); - ScrapeScheduler event(mScrapeConfig, "localhost", 8080, labels, 0, 0); + PromTargetInfo targetInfo; + targetInfo.mLabels = labels; + targetInfo.mHash = "test_hash"; + ScrapeScheduler event(mScrapeConfig, "localhost", 8080, 0, 0, targetInfo); auto streamScraper = prom::StreamScraper(labels, 0, 0, event.GetId(), nullptr, std::chrono::system_clock::now()); HttpResponse httpResponse = HttpResponse(&streamScraper, [](void*) {}, prom::StreamScraper::MetricWriteCallback); auto defaultLabels = MetricLabels(); @@ -133,7 +140,10 @@ void ScrapeSchedulerUnittest::TestStreamMetricWriteCallback() { Labels labels; labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); - ScrapeScheduler event(mScrapeConfig, "localhost", 8080, labels, 0, 0); + PromTargetInfo targetInfo; + targetInfo.mLabels = labels; + targetInfo.mHash = "test_hash"; + ScrapeScheduler event(mScrapeConfig, "localhost", 8080, 0, 0, targetInfo); auto streamScraper = prom::StreamScraper(labels, 0, 0, event.GetId(), nullptr, std::chrono::system_clock::now()); HttpResponse httpResponse = HttpResponse(&streamScraper, [](void*) {}, prom::StreamScraper::MetricWriteCallback); // APSARA_TEST_EQUAL(event.GetId(), "test_jobhttp://localhost:8080/metrics" + ToString(labels.Hash())); @@ -193,7 +203,10 @@ void ScrapeSchedulerUnittest::TestReceiveMessage() { Labels labels; labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); - auto event = make_shared(mScrapeConfig, "localhost", 8080, labels, 0, 0); + PromTargetInfo targetInfo; + targetInfo.mLabels = labels; + targetInfo.mHash = "test_hash"; + auto event = make_shared(mScrapeConfig, "localhost", 8080, 0, 0, targetInfo); // before @@ -207,7 +220,10 @@ void ScrapeSchedulerUnittest::TestReceiveMessage() { void ScrapeSchedulerUnittest::TestScheduler() { Labels labels; labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); - ScrapeScheduler event(mScrapeConfig, "localhost", 8080, labels, 0, 0); + PromTargetInfo targetInfo; + targetInfo.mLabels = labels; + targetInfo.mHash = "test_hash"; + ScrapeScheduler event(mScrapeConfig, "localhost", 8080, 0, 0, targetInfo); auto timer = make_shared(); EventPool eventPool{true}; event.SetComponent(timer, &eventPool); @@ -224,7 +240,10 @@ void ScrapeSchedulerUnittest::TestScheduler() { void ScrapeSchedulerUnittest::TestQueueIsFull() { Labels labels; labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); - ScrapeScheduler event(mScrapeConfig, "localhost", 8080, labels, 0, 0); + PromTargetInfo targetInfo; + targetInfo.mLabels = labels; + targetInfo.mHash = "test_hash"; + ScrapeScheduler event(mScrapeConfig, "localhost", 8080, 0, 0, targetInfo); auto defaultLabels = MetricLabels(); event.InitSelfMonitor(defaultLabels); auto timer = make_shared(); @@ -250,7 +269,10 @@ void ScrapeSchedulerUnittest::TestQueueIsFull() { void ScrapeSchedulerUnittest::TestExactlyScrape() { Labels labels; labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); - ScrapeScheduler event(mScrapeConfig, "localhost", 8080, labels, 0, 0); + PromTargetInfo targetInfo; + targetInfo.mLabels = labels; + targetInfo.mHash = "test_hash"; + ScrapeScheduler event(mScrapeConfig, "localhost", 8080, 0, 0, targetInfo); auto defaultLabels = MetricLabels(); event.InitSelfMonitor(defaultLabels); auto timer = make_shared(); From f89dc8c490b4ef08efceb3aff4affc785930d466 Mon Sep 17 00:00:00 2001 From: liqiang Date: Fri, 17 Jan 2025 03:05:01 +0000 Subject: [PATCH 29/48] chore: update ut --- .../processor/ProcessorPromRelabelMetricNativeUnittest.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/unittest/processor/ProcessorPromRelabelMetricNativeUnittest.cpp b/core/unittest/processor/ProcessorPromRelabelMetricNativeUnittest.cpp index 06a0668aa8..5ef7ea3722 100644 --- a/core/unittest/processor/ProcessorPromRelabelMetricNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorPromRelabelMetricNativeUnittest.cpp @@ -215,6 +215,7 @@ test_metric8{k1="v1", k3="v2", } 9.9410452992e+10 1715829785083 eventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_RESPONSE_SIZE, ToString(2325)); eventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_UP_STATE, ToString(true)); eventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_STATE, string("OK")); + eventGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_STREAM_ID, string("123")); eventGroup.SetTag(string("instance"), "localhost:8080"); eventGroup.SetTag(string("job"), "test_job"); processor.UpdateAutoMetrics(eventGroup, autoMetric); @@ -235,6 +236,7 @@ test_metric8{k1="v1", k3="v2", } 9.9410452992e+10 1715829785083 APSARA_TEST_EQUAL(1, eventGroup.GetEvents().at(14).Cast().GetValue()->mValue); APSARA_TEST_EQUAL("localhost:8080", eventGroup.GetEvents().at(14).Cast().GetTag("instance")); APSARA_TEST_EQUAL("test_job", eventGroup.GetEvents().at(14).Cast().GetTag("job")); + APSARA_TEST_EQUAL("123", eventGroup.GetEvents().at(14).Cast().GetTag("lc_target_hash")); } void ProcessorPromRelabelMetricNativeUnittest::TestHonorLabels() { From 574fd92e2f4950d61ac4e312006598711ca66202 Mon Sep 17 00:00:00 2001 From: liqiang Date: Fri, 17 Jan 2025 03:55:49 +0000 Subject: [PATCH 30/48] update --- core/prometheus/schedulers/BaseScheduler.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/prometheus/schedulers/BaseScheduler.cpp b/core/prometheus/schedulers/BaseScheduler.cpp index 4be34c38b3..35fbc2d6ae 100644 --- a/core/prometheus/schedulers/BaseScheduler.cpp +++ b/core/prometheus/schedulers/BaseScheduler.cpp @@ -8,6 +8,10 @@ using namespace std; namespace logtail { void BaseScheduler::ExecDone() { mExecCount++; + while (mLatestExecTime < mFirstExecTime + chrono::seconds(mExecCount * mInterval)) { + mExecCount++; + } + mLatestExecTime = mFirstExecTime + chrono::seconds(mExecCount * mInterval); mLatestScrapeTime = mFirstScrapeTime + chrono::seconds(mExecCount * mInterval); } From 640729b8be83b1e971d4d4e20aea10369e47aa6e Mon Sep 17 00:00:00 2001 From: liqiang Date: Fri, 17 Jan 2025 04:11:13 +0000 Subject: [PATCH 31/48] update --- core/prometheus/schedulers/BaseScheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/prometheus/schedulers/BaseScheduler.cpp b/core/prometheus/schedulers/BaseScheduler.cpp index 35fbc2d6ae..406ecb98f0 100644 --- a/core/prometheus/schedulers/BaseScheduler.cpp +++ b/core/prometheus/schedulers/BaseScheduler.cpp @@ -8,7 +8,7 @@ using namespace std; namespace logtail { void BaseScheduler::ExecDone() { mExecCount++; - while (mLatestExecTime < mFirstExecTime + chrono::seconds(mExecCount * mInterval)) { + while (mLatestExecTime > mFirstExecTime + chrono::seconds(mExecCount * mInterval)) { mExecCount++; } From b67091a09e9734d6b4bc8e1abea2cdf5c311085e Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 23 Jan 2025 09:23:35 +0000 Subject: [PATCH 32/48] chore: update code style --- core/prometheus/schedulers/ScrapeScheduler.h | 1 + .../schedulers/TargetSubscriberScheduler.cpp | 8 +++---- .../schedulers/TargetSubscriberScheduler.h | 2 +- core/unittest/prometheus/LabelsUnittest.cpp | 21 ------------------- 4 files changed, 6 insertions(+), 26 deletions(-) diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index 594ff96333..ffea395c1b 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -41,6 +41,7 @@ struct PromTargetInfo { class ScrapeScheduler : public BaseScheduler { friend class TargetSubscriberScheduler; + public: ScrapeScheduler(std::shared_ptr scrapeConfigPtr, std::string host, diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 3e61fa9460..5fbfee83d8 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -41,7 +41,7 @@ using namespace std; namespace logtail { std::chrono::steady_clock::time_point TargetSubscriberScheduler::mLastUpdateTime = std::chrono::steady_clock::now(); -uint64_t TargetSubscriberScheduler::mDelaySeconds = 0; +uint64_t TargetSubscriberScheduler::sDelaySeconds = 0; TargetSubscriberScheduler::TargetSubscriberScheduler() : mQueueKey(0), mInputIndex(0), mServicePort(0), mUnRegisterMs(0) { } @@ -360,16 +360,16 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { Json::Value targetInfo; targetInfo[prometheus::HASH] = v->GetId(); targetInfo[prometheus::SIZE] = v->GetLastScrapeSize(); - mDelaySeconds += v->mExecDelayCount; + sDelaySeconds += v->mExecDelayCount; v->mExecDelayCount = 0; root[prometheus::TARGETS_INFO].append(targetInfo); } } auto curTime = std::chrono::steady_clock::now(); auto needToClear = curTime - mLastUpdateTime >= std::chrono::seconds(prometheus::RefeshIntervalSeconds); - root[prometheus::AGENT_INFO][prometheus::SCRAPE_DELAY_SECONDS] = mDelaySeconds; + root[prometheus::AGENT_INFO][prometheus::SCRAPE_DELAY_SECONDS] = sDelaySeconds; if (needToClear) { - mDelaySeconds = 0; + sDelaySeconds = 0; mLastUpdateTime = curTime; } return root.toStyledString(); diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.h b/core/prometheus/schedulers/TargetSubscriberScheduler.h index acaf086cf4..950078822b 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.h +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.h @@ -60,7 +60,7 @@ class TargetSubscriberScheduler : public BaseScheduler { // zero cost upgrade uint64_t mUnRegisterMs; static std::chrono::steady_clock::time_point mLastUpdateTime; - static uint64_t mDelaySeconds; + static uint64_t sDelaySeconds; private: bool ParseScrapeSchedulerGroup(const std::string& content, std::vector& scrapeSchedulerGroup); diff --git a/core/unittest/prometheus/LabelsUnittest.cpp b/core/unittest/prometheus/LabelsUnittest.cpp index cccc0dc109..0f84ca1698 100644 --- a/core/unittest/prometheus/LabelsUnittest.cpp +++ b/core/unittest/prometheus/LabelsUnittest.cpp @@ -58,26 +58,6 @@ void LabelsUnittest::TestRemoveMetaLabels() { APSARA_TEST_EQUAL("", labels.Get("__meta_port")); } -void LabelsUnittest::TestHash() { - Labels labels; - - labels.Set("host", "172.17.0.3:9100"); - labels.Set("ip", "172.17.0.3"); - labels.Set("port", "9100"); - uint64_t hash = labels.Hash(); - - uint64_t expect = prometheus::OFFSET64; - string raw; - raw = raw + "host" + "\xff" + "172.17.0.3:9100" + "\xff" + "ip" + "\xff" + "172.17.0.3" + "\xff" + "port" + "\xff" - + "9100" + "\xff"; - for (auto i : raw) { - expect ^= (uint64_t)i; - expect *= prometheus::PRIME64; - } - - APSARA_TEST_EQUAL(expect, hash); -} - void LabelsUnittest::TestGet() { Labels labels; labels.Set("host", "172.17.0.3:9100"); @@ -117,7 +97,6 @@ void LabelsUnittest::TestRange() { UNIT_TEST_CASE(LabelsUnittest, TestGet) UNIT_TEST_CASE(LabelsUnittest, TestSet) UNIT_TEST_CASE(LabelsUnittest, TestRange) -UNIT_TEST_CASE(LabelsUnittest, TestHash) UNIT_TEST_CASE(LabelsUnittest, TestRemoveMetaLabels) From b24f0f6e91bc540c0bba10901b5110eab752d01f Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 23 Jan 2025 09:32:19 +0000 Subject: [PATCH 33/48] update code style --- core/prometheus/schedulers/TargetSubscriberScheduler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.h b/core/prometheus/schedulers/TargetSubscriberScheduler.h index ca0d8e0523..3b31c930aa 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.h +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.h @@ -62,6 +62,7 @@ class TargetSubscriberScheduler : public BaseScheduler { uint64_t mUnRegisterMs; static std::chrono::steady_clock::time_point mLastUpdateTime; static uint64_t sDelaySeconds; + private: bool ParseScrapeSchedulerGroup(const std::string& content, std::vector& scrapeSchedulerGroup); From bcb3c42b8fadccaea0d14ba4b801e75aee419de2 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 23 Jan 2025 09:43:30 +0000 Subject: [PATCH 34/48] update code style --- core/prometheus/PrometheusInputRunner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index cf8b2cbbbb..8856211051 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -33,7 +33,7 @@ #include "logger/Logger.h" #include "monitor/Monitor.h" #include "monitor/metric_constants/MetricConstants.h" -#include "pipeline/queue/ProcessQueueManager.h" +#include "collection_pipeline/queue/ProcessQueueManager.h" #include "plugin/flusher/sls/FlusherSLS.h" #include "prometheus/Constants.h" #include "prometheus/Utils.h" From 123ff49fdec3693c8d5673897b70dd5fb492b8d4 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 23 Jan 2025 09:51:06 +0000 Subject: [PATCH 35/48] update code style --- core/prometheus/PrometheusInputRunner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index 8856211051..866ff4c8a0 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -22,6 +22,7 @@ #include #include "application/Application.h" +#include "collection_pipeline/queue/ProcessQueueManager.h" #include "common/Flags.h" #include "common/JsonUtil.h" #include "common/StringTools.h" @@ -33,7 +34,6 @@ #include "logger/Logger.h" #include "monitor/Monitor.h" #include "monitor/metric_constants/MetricConstants.h" -#include "collection_pipeline/queue/ProcessQueueManager.h" #include "plugin/flusher/sls/FlusherSLS.h" #include "prometheus/Constants.h" #include "prometheus/Utils.h" From ecb992daa7b439344ab3c139fceccc4eb5f4c911 Mon Sep 17 00:00:00 2001 From: liqiang Date: Fri, 24 Jan 2025 03:12:22 +0000 Subject: [PATCH 36/48] chore: update --- core/prometheus/Constants.h | 1 - core/prometheus/PrometheusInputRunner.cpp | 14 -------------- core/prometheus/PrometheusInputRunner.h | 3 +-- .../schedulers/TargetSubscriberScheduler.cpp | 1 - 4 files changed, 1 insertion(+), 18 deletions(-) diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index c5f8ead7a8..2f2d9c7ead 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -130,7 +130,6 @@ const char* const MEM_LIMIT = "MemLimit"; const char* const MEM_USAGE = "MemUsage"; const char* const HASH = "Hash"; const char* const SIZE = "Size"; -const char* const HEALTH = "Health"; const char* const SCRAPE_DELAY_SECONDS = "ScrapeDelaySeconds"; } // namespace logtail::prometheus diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index 866ff4c8a0..4526fd0dfc 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -324,20 +324,6 @@ PromAgentInfo PrometheusInputRunner::GetAgentInfo() { } } } - mAgentInfo.mHealth = 0; - if (mAgentInfo.mCpuLimit > 0.0) { - mAgentInfo.mHealth += (1 - mAgentInfo.mCpuUsage / mAgentInfo.mCpuLimit); - } - if (mAgentInfo.mMemLimit > 0.0) { - mAgentInfo.mHealth += (1 - mAgentInfo.mMemUsage / mAgentInfo.mMemLimit); - } - if (queueNums > 0) { - mAgentInfo.mHealth += (1.0 * validToPushNums / queueNums); - } else { - mAgentInfo.mHealth += 1; - } - - mAgentInfo.mHealth /= 3; } return mAgentInfo; diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index aa8f0863a6..c9aa664a42 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -35,7 +35,6 @@ struct PromAgentInfo { float mMemUsage; float mCpuLimit; float mMemLimit; - float mHealth; }; class PrometheusInputRunner : public InputRunner { @@ -84,7 +83,7 @@ class PrometheusInputRunner : public InputRunner { std::string mPodName; std::mutex mAgentInfoMutex; - PromAgentInfo mAgentInfo{0, 0, 0, 0, 1}; + PromAgentInfo mAgentInfo{0.0F, 0.0F, 0.0F, 0.0F}; std::chrono::steady_clock::time_point mLastUpdateTime; std::shared_ptr mTimer; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 5fbfee83d8..b67b45235b 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -353,7 +353,6 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = agentInfo.mCpuLimit; root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; - root[prometheus::AGENT_INFO][prometheus::HEALTH] = agentInfo.mHealth; { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { From 3c61cf753f3d66904be46c2c0f482688c8e149bd Mon Sep 17 00:00:00 2001 From: liqiang Date: Fri, 24 Jan 2025 06:22:52 +0000 Subject: [PATCH 37/48] chore: update --- core/monitor/Monitor.h | 4 +-- core/prometheus/PrometheusInputRunner.cpp | 33 +++---------------- core/prometheus/PrometheusInputRunner.h | 6 ++-- .../schedulers/TargetSubscriberScheduler.cpp | 4 ++- 4 files changed, 12 insertions(+), 35 deletions(-) diff --git a/core/monitor/Monitor.h b/core/monitor/Monitor.h index 9e851a2390..ea4a5e452f 100644 --- a/core/monitor/Monitor.h +++ b/core/monitor/Monitor.h @@ -92,7 +92,7 @@ class LogtailMonitor { // LogInput use it to do flow control. float GetRealtimeCpuLevel() { return mRealtimeCpuStat.mCpuUsage / mScaledCpuUsageUpLimit; } [[nodiscard]] float GetCpuUsage() const { return mCpuUsage.load(); } - [[nodiscard]] float GetMemoryUsage() const { return mMemoryUsage.load(); } + [[nodiscard]] int64_t GetMemoryUsage() const { return mMemoryUsage.load(); } private: LogtailMonitor(); @@ -163,7 +163,7 @@ class LogtailMonitor { MemStat mMemStat; std::atomic mCpuUsage = 0; - std::atomic mMemoryUsage = 0; + std::atomic mMemoryUsage = 0; // Current scale up level, updated by CheckScaledCpuUsageUpLimit. float mScaledCpuUsageUpLimit; diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index 4526fd0dfc..2f830a7b97 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -299,33 +299,10 @@ void PrometheusInputRunner::CheckGC() { mEventPool.CheckGC(); } -PromAgentInfo PrometheusInputRunner::GetAgentInfo() { - std::lock_guard lock(mAgentInfoMutex); - auto curTime = std::chrono::steady_clock::now(); -#ifdef APSARA_UNIT_TEST_MAIN - curTime += std::chrono::seconds(prometheus::RefeshIntervalSeconds); -#endif - if (curTime - mLastUpdateTime >= std::chrono::seconds(prometheus::RefeshIntervalSeconds)) { - mLastUpdateTime = curTime; - mAgentInfo.mCpuUsage = LogtailMonitor::GetInstance()->GetCpuUsage(); - mAgentInfo.mMemUsage = LogtailMonitor::GetInstance()->GetMemoryUsage(); - mAgentInfo.mCpuLimit = AppConfig::GetInstance()->GetCpuUsageUpLimit(); - mAgentInfo.mMemLimit = AppConfig::GetInstance()->GetMemUsageUpLimit(); - - int queueNums = 0; - int validToPushNums = 0; - - { - ReadLock lock(mSubscriberMapRWLock); - queueNums = mTargetSubscriberSchedulerMap.size(); - for (auto& [k, v] : mTargetSubscriberSchedulerMap) { - if (ProcessQueueManager::GetInstance()->IsValidToPush(v->mQueueKey)) { - validToPushNums++; - } - } - } - } - - return mAgentInfo; +void PrometheusInputRunner::GetAgentInfo(PromAgentInfo& agentInfo) { + agentInfo.mCpuUsage = LogtailMonitor::GetInstance()->GetCpuUsage(); + agentInfo.mMemUsage = LogtailMonitor::GetInstance()->GetMemoryUsage(); + agentInfo.mCpuLimit = AppConfig::GetInstance()->GetCpuUsageUpLimit(); + agentInfo.mMemLimit = AppConfig::GetInstance()->GetMemUsageUpLimit(); } }; // namespace logtail diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index c9aa664a42..8a31f68f28 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -32,7 +32,7 @@ namespace logtail { struct PromAgentInfo { float mCpuUsage; - float mMemUsage; + int64_t mMemUsage; float mCpuLimit; float mMemLimit; }; @@ -49,7 +49,7 @@ class PrometheusInputRunner : public InputRunner { return &sInstance; } void CheckGC(); - PromAgentInfo GetAgentInfo(); + void GetAgentInfo(PromAgentInfo &agentInfo); // input plugin update void UpdateScrapeInput(std::shared_ptr targetSubscriber, @@ -82,8 +82,6 @@ class PrometheusInputRunner : public InputRunner { int32_t mServicePort; std::string mPodName; - std::mutex mAgentInfoMutex; - PromAgentInfo mAgentInfo{0.0F, 0.0F, 0.0F, 0.0F}; std::chrono::steady_clock::time_point mLastUpdateTime; std::shared_ptr mTimer; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index b67b45235b..e0cc0f1e6d 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -348,7 +348,9 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: string TargetSubscriberScheduler::TargetsInfoToString() const { Json::Value root; - auto agentInfo = PrometheusInputRunner::GetInstance()->GetAgentInfo(); + PromAgentInfo agentInfo{0.0F, 0, 0.0F, 0.0F}; + + PrometheusInputRunner::GetInstance()->GetAgentInfo(agentInfo); root[prometheus::AGENT_INFO][prometheus::CPU_USAGE] = agentInfo.mCpuUsage; root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = agentInfo.mCpuLimit; root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; From 3b82b6df3cfa84438ba0723ded4b19905e534589 Mon Sep 17 00:00:00 2001 From: liqiang Date: Fri, 24 Jan 2025 06:28:27 +0000 Subject: [PATCH 38/48] chore: update code style --- core/prometheus/PrometheusInputRunner.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index 8a31f68f28..2ff4330b61 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -49,7 +49,7 @@ class PrometheusInputRunner : public InputRunner { return &sInstance; } void CheckGC(); - void GetAgentInfo(PromAgentInfo &agentInfo); + void GetAgentInfo(PromAgentInfo& agentInfo); // input plugin update void UpdateScrapeInput(std::shared_ptr targetSubscriber, From 8e02f7496fb5e55e75f89c8cb7afd49d3bba067c Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 11 Feb 2025 08:27:31 +0000 Subject: [PATCH 39/48] chore: remove health ut --- core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp index 4e4d84b6f7..fc93b100ac 100644 --- a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp +++ b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp @@ -343,7 +343,6 @@ void TargetSubscriberSchedulerUnittest::TestTargetsInfoToString() { ParseJsonTable(res, data, errorMsg); APSARA_TEST_EQUAL(2.0, data[prometheus::AGENT_INFO][prometheus::CPU_LIMIT].asFloat()); APSARA_TEST_EQUAL((uint64_t)3, data[prometheus::TARGETS_INFO].size()); - APSARA_TEST_EQUAL(true, data[prometheus::AGENT_INFO][prometheus::HEALTH].asFloat() > 0.6); } UNIT_TEST_CASE(TargetSubscriberSchedulerUnittest, OnInitScrapeJobEvent) From 6d508cd38002bc79719655bb92ecbb885bf18b3c Mon Sep 17 00:00:00 2001 From: liqiang Date: Wed, 19 Feb 2025 09:56:06 +0000 Subject: [PATCH 40/48] chore: update --- core/prometheus/PrometheusInputRunner.h | 2 +- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index 2ff4330b61..b0b3488c32 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -34,7 +34,7 @@ struct PromAgentInfo { float mCpuUsage; int64_t mMemUsage; float mCpuLimit; - float mMemLimit; + int64_t mMemLimit; }; class PrometheusInputRunner : public InputRunner { diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 2af99b549c..ecc608587d 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -348,7 +348,7 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: string TargetSubscriberScheduler::TargetsInfoToString() const { Json::Value root; - PromAgentInfo agentInfo{0.0F, 0, 0.0F, 0.0F}; + PromAgentInfo agentInfo{0.0F, 0, 0.0F, 0}; PrometheusInputRunner::GetInstance()->GetAgentInfo(agentInfo); root[prometheus::AGENT_INFO][prometheus::CPU_USAGE] = agentInfo.mCpuUsage; From ccb5f20e2df4a7e70915f8f34f768f886366d355 Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 25 Feb 2025 07:20:01 +0000 Subject: [PATCH 41/48] chore: fix ut --- core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp index 9583befc42..a06f023aeb 100644 --- a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp +++ b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp @@ -20,6 +20,7 @@ #include #include +#include "ScrapeScheduler.h" #include "common/JsonUtil.h" #include "prometheus/Constants.h" #include "prometheus/labels/Labels.h" @@ -339,7 +340,7 @@ void TargetSubscriberSchedulerUnittest::TestTargetLabels() { int32_t port) { std::shared_ptr targetSubscriber = std::make_shared(); APSARA_TEST_TRUE(targetSubscriber->Init(scrapeConfig)); - std::vector newScrapeSchedulerSet; + std::vector newScrapeSchedulerSet; APSARA_TEST_TRUE(targetSubscriber->ParseScrapeSchedulerGroup(targetResponse, newScrapeSchedulerSet)); APSARA_TEST_EQUAL(1UL, newScrapeSchedulerSet.size()); From cb0d2ea186793d58f632d23494b6df191082840d Mon Sep 17 00:00:00 2001 From: liqiang Date: Tue, 25 Feb 2025 08:18:28 +0000 Subject: [PATCH 42/48] chore: add ut --- .../TargetSubscriberSchedulerUnittest.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp index a06f023aeb..3be26fda97 100644 --- a/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp +++ b/core/unittest/prometheus/TargetSubscriberSchedulerUnittest.cpp @@ -579,6 +579,23 @@ void TargetSubscriberSchedulerUnittest::TestTargetLabels() { 80); } +void TargetSubscriberSchedulerUnittest::TestTargetsInfoToString() { + std::shared_ptr targetSubscriber = std::make_shared(); + auto metricLabels = MetricLabels(); + APSARA_TEST_TRUE(targetSubscriber->Init(mConfig["ScrapeConfig"])); + targetSubscriber->InitSelfMonitor(metricLabels); + // if status code is 200 + mHttpResponse.SetStatusCode(200); + targetSubscriber->OnSubscription(mHttpResponse, 0); + APSARA_TEST_EQUAL(3UL, targetSubscriber->mScrapeSchedulerMap.size()); + auto res = targetSubscriber->TargetsInfoToString(); + string errorMsg; + Json::Value data; + ParseJsonTable(res, data, errorMsg); + APSARA_TEST_EQUAL(2.0, data[prometheus::AGENT_INFO][prometheus::CPU_LIMIT].asFloat()); + APSARA_TEST_EQUAL((uint64_t)3, data[prometheus::TARGETS_INFO].size()); +} + UNIT_TEST_CASE(TargetSubscriberSchedulerUnittest, OnInitScrapeJobEvent) UNIT_TEST_CASE(TargetSubscriberSchedulerUnittest, TestProcess) UNIT_TEST_CASE(TargetSubscriberSchedulerUnittest, TestParseTargetGroups) From 690e0093b8c3923f6666157dc53730ae512e9c06 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 27 Feb 2025 06:55:40 +0000 Subject: [PATCH 43/48] chore: update --- .../processor/inner/ProcessorPromRelabelMetricNative.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp b/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp index ec7cfe3740..22fd09b010 100644 --- a/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp +++ b/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp @@ -173,10 +173,6 @@ void ProcessorPromRelabelMetricNative::AddAutoMetrics(PipelineEventGroup& eGroup return; } auto targetTags = eGroup.GetTags(); - auto toDelete = GetToDeleteTargetLabels(targetTags); - for (const auto& item : toDelete) { - targetTags.erase(item); - } if (!eGroup.HasMetadata(EventGroupMetaKey::PROMETHEUS_STREAM_ID)) { LOG_ERROR(sLogger, ("prometheus stream id", "")); return; From 2794943761fe4f64699d438876bbdcd980999e55 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 27 Feb 2025 10:07:25 +0000 Subject: [PATCH 44/48] feat: add http sink info --- core/monitor/Monitor.h | 5 ----- .../inner/ProcessorPromRelabelMetricNative.cpp | 2 +- core/prometheus/Constants.h | 2 ++ core/prometheus/PrometheusInputRunner.cpp | 15 ++++++++++----- core/prometheus/PrometheusInputRunner.h | 2 ++ .../schedulers/TargetSubscriberScheduler.cpp | 2 ++ 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/core/monitor/Monitor.h b/core/monitor/Monitor.h index e1ec00977c..6fff5c6508 100644 --- a/core/monitor/Monitor.h +++ b/core/monitor/Monitor.h @@ -93,8 +93,6 @@ class LogtailMonitor { // GetRealtimeCpuLevel return a value to indicates current CPU usage level. // LogInput use it to do flow control. float GetRealtimeCpuLevel() { return mRealtimeCpuStat.mCpuUsage / mScaledCpuUsageUpLimit; } - [[nodiscard]] float GetCpuUsage() const { return mCpuUsage.load(); } - [[nodiscard]] int64_t GetMemoryUsage() const { return mMemoryUsage.load(); } private: LogtailMonitor(); @@ -165,9 +163,6 @@ class LogtailMonitor { // Memory usage statistics. MemStat mMemStat; - std::atomic mCpuUsage = 0; - std::atomic mMemoryUsage = 0; - // Current scale up level, updated by CheckScaledCpuUsageUpLimit. float mScaledCpuUsageUpLimit; #if defined(__linux__) diff --git a/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp b/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp index 22fd09b010..56995708a0 100644 --- a/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp +++ b/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp @@ -168,11 +168,11 @@ void ProcessorPromRelabelMetricNative::UpdateAutoMetrics(const PipelineEventGrou void ProcessorPromRelabelMetricNative::AddAutoMetrics(PipelineEventGroup& eGroup, const prom::AutoMetric& autoMetric) const { + auto targetTags = eGroup.GetTags(); if (!eGroup.HasMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_TIMESTAMP_MILLISEC)) { LOG_ERROR(sLogger, ("scrape_timestamp_milliseconds is not set", "")); return; } - auto targetTags = eGroup.GetTags(); if (!eGroup.HasMetadata(EventGroupMetaKey::PROMETHEUS_STREAM_ID)) { LOG_ERROR(sLogger, ("prometheus stream id", "")); return; diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index 2f2d9c7ead..ec9fb3a2af 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -128,6 +128,8 @@ const char* const CPU_LIMIT = "CpuLimit"; const char* const CPU_USAGE = "CpuUsage"; const char* const MEM_LIMIT = "MemLimit"; const char* const MEM_USAGE = "MemUsage"; +const char* const HTTP_SINK_OUT_SUCCESS = "HttpSinkOutSuccess"; +const char* const HTTP_SINK_OUT_FAILED = "HttpSinkOutFailed"; const char* const HASH = "Hash"; const char* const SIZE = "Size"; const char* const SCRAPE_DELAY_SECONDS = "ScrapeDelaySeconds"; diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index 101def32c3..5257c9f6fb 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -21,12 +21,12 @@ #include #include +#include "AppConfig.h" +#include "SelfMonitorMetricEvent.h" #include "application/Application.h" -#include "collection_pipeline/queue/ProcessQueueManager.h" #include "common/Flags.h" #include "common/JsonUtil.h" #include "common/StringTools.h" -#include "common/TimeUtil.h" #include "common/http/AsynCurlRunner.h" #include "common/http/Constant.h" #include "common/http/Curl.h" @@ -34,7 +34,6 @@ #include "logger/Logger.h" #include "monitor/Monitor.h" #include "monitor/metric_constants/MetricConstants.h" -#include "plugin/flusher/sls/FlusherSLS.h" #include "prometheus/Constants.h" #include "prometheus/Utils.h" @@ -300,9 +299,15 @@ void PrometheusInputRunner::CheckGC() { } void PrometheusInputRunner::GetAgentInfo(PromAgentInfo& agentInfo) { - agentInfo.mCpuUsage = LogtailMonitor::GetInstance()->GetCpuUsage(); - agentInfo.mMemUsage = LogtailMonitor::GetInstance()->GetMemoryUsage(); + SelfMonitorMetricEvent wantAgentEvent; + LoongCollectorMonitor::GetInstance()->GetAgentMetric(wantAgentEvent); + SelfMonitorMetricEvent wantRunnerEvent; + LoongCollectorMonitor::GetInstance()->GetRunnerMetric(METRIC_LABEL_VALUE_RUNNER_NAME_HTTP_SINK, wantRunnerEvent); + agentInfo.mCpuUsage = wantAgentEvent.GetGauge(METRIC_AGENT_CPU); + agentInfo.mMemUsage = wantAgentEvent.GetGauge(METRIC_AGENT_MEMORY); agentInfo.mCpuLimit = AppConfig::GetInstance()->GetCpuUsageUpLimit(); agentInfo.mMemLimit = AppConfig::GetInstance()->GetMemUsageUpLimit(); + agentInfo.mHttpSinkOutSuccess = wantRunnerEvent.GetCounter(METRIC_RUNNER_SINK_OUT_SUCCESSFUL_ITEMS_TOTAL); + agentInfo.mHttpSinkOutFailed = wantRunnerEvent.GetCounter(METRIC_RUNNER_SINK_OUT_FAILED_ITEMS_TOTAL); } }; // namespace logtail diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index b0b3488c32..8fe1a15ff4 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -35,6 +35,8 @@ struct PromAgentInfo { int64_t mMemUsage; float mCpuLimit; int64_t mMemLimit; + int64_t mHttpSinkOutSuccess; + int64_t mHttpSinkOutFailed; }; class PrometheusInputRunner : public InputRunner { diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index d32aa5ff34..3474b4ee0a 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -423,6 +423,8 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = agentInfo.mCpuLimit; root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; + root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_OUT_SUCCESS] = agentInfo.mHttpSinkOutSuccess; + root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_OUT_FAILED] = agentInfo.mHttpSinkOutFailed; { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { From a9711a3d4c9e684c62edc838dcbf49347abb5ce8 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 27 Feb 2025 11:02:55 +0000 Subject: [PATCH 45/48] chore: update --- core/prometheus/PrometheusInputRunner.cpp | 16 ----------- core/prometheus/PrometheusInputRunner.h | 10 ------- .../schedulers/TargetSubscriberScheduler.cpp | 27 ++++++++++++------- 3 files changed, 17 insertions(+), 36 deletions(-) diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index 5257c9f6fb..419cf3602c 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -21,8 +21,6 @@ #include #include -#include "AppConfig.h" -#include "SelfMonitorMetricEvent.h" #include "application/Application.h" #include "common/Flags.h" #include "common/JsonUtil.h" @@ -32,7 +30,6 @@ #include "common/http/Curl.h" #include "common/timer/Timer.h" #include "logger/Logger.h" -#include "monitor/Monitor.h" #include "monitor/metric_constants/MetricConstants.h" #include "prometheus/Constants.h" #include "prometheus/Utils.h" @@ -297,17 +294,4 @@ string PrometheusInputRunner::GetAllProjects() { void PrometheusInputRunner::CheckGC() { mEventPool.CheckGC(); } - -void PrometheusInputRunner::GetAgentInfo(PromAgentInfo& agentInfo) { - SelfMonitorMetricEvent wantAgentEvent; - LoongCollectorMonitor::GetInstance()->GetAgentMetric(wantAgentEvent); - SelfMonitorMetricEvent wantRunnerEvent; - LoongCollectorMonitor::GetInstance()->GetRunnerMetric(METRIC_LABEL_VALUE_RUNNER_NAME_HTTP_SINK, wantRunnerEvent); - agentInfo.mCpuUsage = wantAgentEvent.GetGauge(METRIC_AGENT_CPU); - agentInfo.mMemUsage = wantAgentEvent.GetGauge(METRIC_AGENT_MEMORY); - agentInfo.mCpuLimit = AppConfig::GetInstance()->GetCpuUsageUpLimit(); - agentInfo.mMemLimit = AppConfig::GetInstance()->GetMemUsageUpLimit(); - agentInfo.mHttpSinkOutSuccess = wantRunnerEvent.GetCounter(METRIC_RUNNER_SINK_OUT_SUCCESSFUL_ITEMS_TOTAL); - agentInfo.mHttpSinkOutFailed = wantRunnerEvent.GetCounter(METRIC_RUNNER_SINK_OUT_FAILED_ITEMS_TOTAL); -} }; // namespace logtail diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index 8fe1a15ff4..26833e5b64 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -30,15 +30,6 @@ namespace logtail { -struct PromAgentInfo { - float mCpuUsage; - int64_t mMemUsage; - float mCpuLimit; - int64_t mMemLimit; - int64_t mHttpSinkOutSuccess; - int64_t mHttpSinkOutFailed; -}; - class PrometheusInputRunner : public InputRunner { public: PrometheusInputRunner(const PrometheusInputRunner&) = delete; @@ -51,7 +42,6 @@ class PrometheusInputRunner : public InputRunner { return &sInstance; } void CheckGC(); - void GetAgentInfo(PromAgentInfo& agentInfo); // input plugin update void UpdateScrapeInput(std::shared_ptr targetSubscriber, diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 3474b4ee0a..441a4a0a62 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -22,15 +22,17 @@ #include #include +#include "AppConfig.h" +#include "SelfMonitorMetricEvent.h" #include "common/JsonUtil.h" #include "common/StringTools.h" #include "common/TimeUtil.h" #include "common/http/Constant.h" #include "common/timer/HttpRequestTimerEvent.h" #include "logger/Logger.h" +#include "monitor/Monitor.h" #include "monitor/metric_constants/MetricConstants.h" #include "prometheus/Constants.h" -#include "prometheus/PrometheusInputRunner.h" #include "prometheus/Utils.h" #include "prometheus/async/PromFuture.h" #include "prometheus/async/PromHttpRequest.h" @@ -416,15 +418,20 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: string TargetSubscriberScheduler::TargetsInfoToString() const { Json::Value root; - PromAgentInfo agentInfo{0.0F, 0, 0.0F, 0}; - - PrometheusInputRunner::GetInstance()->GetAgentInfo(agentInfo); - root[prometheus::AGENT_INFO][prometheus::CPU_USAGE] = agentInfo.mCpuUsage; - root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = agentInfo.mCpuLimit; - root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = agentInfo.mMemUsage; - root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = agentInfo.mMemLimit; - root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_OUT_SUCCESS] = agentInfo.mHttpSinkOutSuccess; - root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_OUT_FAILED] = agentInfo.mHttpSinkOutFailed; + + SelfMonitorMetricEvent wantAgentEvent; + LoongCollectorMonitor::GetInstance()->GetAgentMetric(wantAgentEvent); + SelfMonitorMetricEvent wantRunnerEvent; + LoongCollectorMonitor::GetInstance()->GetRunnerMetric(METRIC_LABEL_VALUE_RUNNER_NAME_HTTP_SINK, wantRunnerEvent); + + root[prometheus::AGENT_INFO][prometheus::CPU_USAGE] = wantAgentEvent.GetGauge(METRIC_AGENT_CPU); + root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = AppConfig::GetInstance()->GetCpuUsageUpLimit(); + root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = wantAgentEvent.GetGauge(METRIC_AGENT_MEMORY); + root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = AppConfig::GetInstance()->GetMemUsageUpLimit(); + root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_OUT_SUCCESS] + = wantRunnerEvent.GetCounter(METRIC_RUNNER_SINK_OUT_SUCCESSFUL_ITEMS_TOTAL); + root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_OUT_FAILED] + = wantRunnerEvent.GetCounter(METRIC_RUNNER_SINK_OUT_FAILED_ITEMS_TOTAL); { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) { From baa25ae702eaf2ad8c5909625d4eb760689aeefb Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 27 Feb 2025 11:15:29 +0000 Subject: [PATCH 46/48] chore: update --- core/prometheus/PrometheusInputRunner.cpp | 2 -- core/prometheus/PrometheusInputRunner.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index 40ec7bb0a6..ff130b7229 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -48,8 +48,6 @@ PrometheusInputRunner::PrometheusInputRunner() mPodName(STRING_FLAG(_pod_name_)), mEventPool(true), mUnRegisterMs(0) { - mLastUpdateTime = std::chrono::steady_clock::now(); - // self monitor MetricLabels labels; labels.emplace_back(METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_PROMETHEUS); diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index 1ad281b5b3..4519b37e39 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -74,8 +74,6 @@ class PrometheusInputRunner : public InputRunner { int32_t mServicePort; std::string mPodName; - std::chrono::steady_clock::time_point mLastUpdateTime; - EventPool mEventPool; mutable ReadWriteLock mSubscriberMapRWLock; From 2718e8e295b5018d41e70fa7f3aea70123690aa2 Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 27 Feb 2025 11:18:20 +0000 Subject: [PATCH 47/48] chore: update --- core/prometheus/Constants.h | 2 +- core/prometheus/schedulers/TargetSubscriberScheduler.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index ec9fb3a2af..a5de4dd345 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -128,7 +128,7 @@ const char* const CPU_LIMIT = "CpuLimit"; const char* const CPU_USAGE = "CpuUsage"; const char* const MEM_LIMIT = "MemLimit"; const char* const MEM_USAGE = "MemUsage"; -const char* const HTTP_SINK_OUT_SUCCESS = "HttpSinkOutSuccess"; +const char* const HTTP_SINK_IN_ITEMS_TOTAL = "HttpSinkInItemsTotal"; const char* const HTTP_SINK_OUT_FAILED = "HttpSinkOutFailed"; const char* const HASH = "Hash"; const char* const SIZE = "Size"; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index f6bb0722a6..bc2dd31bac 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -424,8 +424,8 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = AppConfig::GetInstance()->GetCpuUsageUpLimit(); root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = wantAgentEvent.GetGauge(METRIC_AGENT_MEMORY); root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = AppConfig::GetInstance()->GetMemUsageUpLimit(); - root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_OUT_SUCCESS] - = wantRunnerEvent.GetCounter(METRIC_RUNNER_SINK_OUT_SUCCESSFUL_ITEMS_TOTAL); + root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_IN_ITEMS_TOTAL] + = wantRunnerEvent.GetCounter(METRIC_RUNNER_IN_ITEMS_TOTAL); root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_OUT_FAILED] = wantRunnerEvent.GetCounter(METRIC_RUNNER_SINK_OUT_FAILED_ITEMS_TOTAL); { From a6435279aac62e5a9ba69b00f33b49761db2656f Mon Sep 17 00:00:00 2001 From: liqiang Date: Thu, 27 Feb 2025 12:42:12 +0000 Subject: [PATCH 48/48] chore: update --- .../inner/ProcessorPromRelabelMetricNative.cpp | 2 +- core/prometheus/Constants.h | 2 +- .../schedulers/TargetSubscriberScheduler.cpp | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp b/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp index 56995708a0..efe61f4ec1 100644 --- a/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp +++ b/core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp @@ -188,7 +188,7 @@ void ProcessorPromRelabelMetricNative::AddAutoMetrics(PipelineEventGroup& eGroup eGroup, prometheus::SCRAPE_DURATION_SECONDS, autoMetric.mScrapeDurationSeconds, timestamp, nanoSec, targetTags); AddMetric(eGroup, - prometheus::SCRAPE_RESPONSE_SIZE_BYTES, + prometheus::SCRAPE_BODY_SIZE_BYTES, autoMetric.mScrapeResponseSizeBytes, timestamp, nanoSec, diff --git a/core/prometheus/Constants.h b/core/prometheus/Constants.h index a5de4dd345..f155ebcdf5 100644 --- a/core/prometheus/Constants.h +++ b/core/prometheus/Constants.h @@ -107,7 +107,7 @@ const char* const REBALANCE_MS = "rebalance_ms"; // auto metrics const char* const SCRAPE_STATE = "scrape_state"; const char* const SCRAPE_DURATION_SECONDS = "scrape_duration_seconds"; -const char* const SCRAPE_RESPONSE_SIZE_BYTES = "scrape_response_size_bytes"; +const char* const SCRAPE_BODY_SIZE_BYTES = "scrape_body_size_bytes"; const char* const SCRAPE_SAMPLES_LIMIT = "scrape_samples_limit"; const char* const SCRAPE_SAMPLES_POST_METRIC_RELABELING = "scrape_samples_post_metric_relabeling"; const char* const SCRAPE_SAMPLES_SCRAPED = "scrape_samples_scraped"; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index bc2dd31bac..500880f48b 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -420,14 +420,14 @@ string TargetSubscriberScheduler::TargetsInfoToString() const { SelfMonitorMetricEvent wantRunnerEvent; LoongCollectorMonitor::GetInstance()->GetRunnerMetric(METRIC_LABEL_VALUE_RUNNER_NAME_HTTP_SINK, wantRunnerEvent); - root[prometheus::AGENT_INFO][prometheus::CPU_USAGE] = wantAgentEvent.GetGauge(METRIC_AGENT_CPU); - root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = AppConfig::GetInstance()->GetCpuUsageUpLimit(); - root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = wantAgentEvent.GetGauge(METRIC_AGENT_MEMORY); - root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = AppConfig::GetInstance()->GetMemUsageUpLimit(); + root[prometheus::AGENT_INFO][prometheus::CPU_USAGE] = wantAgentEvent.GetGauge(METRIC_AGENT_CPU); // double + root[prometheus::AGENT_INFO][prometheus::CPU_LIMIT] = AppConfig::GetInstance()->GetCpuUsageUpLimit(); // float + root[prometheus::AGENT_INFO][prometheus::MEM_USAGE] = wantAgentEvent.GetGauge(METRIC_AGENT_MEMORY); // double + root[prometheus::AGENT_INFO][prometheus::MEM_LIMIT] = AppConfig::GetInstance()->GetMemUsageUpLimit(); // int64_t root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_IN_ITEMS_TOTAL] - = wantRunnerEvent.GetCounter(METRIC_RUNNER_IN_ITEMS_TOTAL); + = wantRunnerEvent.GetCounter(METRIC_RUNNER_IN_ITEMS_TOTAL); // uint64_t root[prometheus::AGENT_INFO][prometheus::HTTP_SINK_OUT_FAILED] - = wantRunnerEvent.GetCounter(METRIC_RUNNER_SINK_OUT_FAILED_ITEMS_TOTAL); + = wantRunnerEvent.GetCounter(METRIC_RUNNER_SINK_OUT_FAILED_ITEMS_TOTAL); // uint64_t { ReadLock lock(mRWLock); for (const auto& [k, v] : mScrapeSchedulerMap) {