Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add scrape state metrics #1900

Merged
merged 31 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
7bcec59
feat: add scrape state metrics
catdogpandas Nov 19, 2024
0cd4d62
feat: update
catdogpandas Nov 20, 2024
a15a57c
update
catdogpandas Nov 20, 2024
ca53f80
update
catdogpandas Nov 20, 2024
afe17fc
update
catdogpandas Nov 20, 2024
a59b189
update
catdogpandas Nov 20, 2024
12773ce
update
catdogpandas Nov 20, 2024
59efed5
chore: update ut
catdogpandas Nov 20, 2024
e2c84d6
chore: add ut
catdogpandas Nov 20, 2024
0c6b002
update
catdogpandas Nov 25, 2024
acfe0b2
update
catdogpandas Nov 25, 2024
1d42ba6
feat: change to autometric
catdogpandas Nov 28, 2024
a5586d9
update
catdogpandas Nov 28, 2024
0eff5b3
update
catdogpandas Nov 28, 2024
562a1a4
update
catdogpandas Nov 28, 2024
d9e6b7a
chore: add enable_scrape_state
catdogpandas Nov 28, 2024
0028a72
update
catdogpandas Nov 29, 2024
f06b4e0
Merge branch 'main' into feat/prom-curl-err-msg
catdogpandas Nov 29, 2024
8fd8b12
feat: refactor scrape_state
catdogpandas Nov 29, 2024
81a1d8e
update
catdogpandas Nov 29, 2024
a4d011a
update
catdogpandas Nov 29, 2024
0b16b06
update
catdogpandas Nov 29, 2024
1db2b6a
update
catdogpandas Nov 29, 2024
04049ae
chore: remove enable_scrape_state
catdogpandas Dec 2, 2024
e4a9fa1
chore: add ut
catdogpandas Dec 2, 2024
c0e1ed8
Merge branch 'main' into feat/prom-curl-err-msg
catdogpandas Dec 2, 2024
3bca53b
chore: add HttpCodeToState ERR_HTTP_xxx
catdogpandas Dec 3, 2024
448719f
update
catdogpandas Dec 3, 2024
6e35124
chore: update code style
catdogpandas Dec 3, 2024
0d7b61b
Merge branch 'main' into feat/prom-curl-err-msg
catdogpandas Dec 3, 2024
3fc9223
Merge branch 'main' into feat/prom-curl-err-msg
catdogpandas Dec 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions core/common/http/AsynCurlRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ bool AsynCurlRunner::AddRequestToClient(unique_ptr<AsynHttpRequest>&& request) {
AppConfig::GetInstance()->GetBindInterface());
if (curl == nullptr) {
LOG_ERROR(sLogger, ("failed to send request", "failed to init curl handler")("request address", request.get()));
request->mResponse.SetCurlCode(CURLE_FAILED_INIT);
request->OnSendDone(request->mResponse);
return false;
}
Expand All @@ -103,6 +104,7 @@ bool AsynCurlRunner::AddRequestToClient(unique_ptr<AsynHttpRequest>&& request) {
LOG_ERROR(sLogger,
("failed to send request", "failed to add the easy curl handle to multi_handle")(
"errMsg", curl_multi_strerror(res))("request address", request.get()));
request->mResponse.SetCurlCode(CURLE_FAILED_INIT);
request->OnSendDone(request->mResponse);
curl_easy_cleanup(curl);
return false;
Expand Down Expand Up @@ -135,9 +137,7 @@ void AsynCurlRunner::DoRun() {
}
}

struct timeval timeout {
1, 0
};
struct timeval timeout{1, 0};
long curlTimeout = -1;
if ((mc = curl_multi_timeout(mClient, &curlTimeout)) != CURLM_OK) {
LOG_WARNING(
Expand Down Expand Up @@ -189,6 +189,7 @@ void AsynCurlRunner::HandleCompletedRequests(int& runningHandlers) {
case CURLE_OK: {
long statusCode = 0;
curl_easy_getinfo(handler, CURLINFO_RESPONSE_CODE, &statusCode);
request->mResponse.SetCurlCode(CURLE_OK);
request->mResponse.SetStatusCode(statusCode);
request->OnSendDone(request->mResponse);
LOG_DEBUG(sLogger,
Expand All @@ -213,6 +214,7 @@ void AsynCurlRunner::HandleCompletedRequests(int& runningHandlers) {
++runningHandlers;
requestReused = true;
} else {
request->mResponse.SetCurlCode(msg->data.result);
request->OnSendDone(request->mResponse);
LOG_DEBUG(
sLogger,
Expand Down
3 changes: 3 additions & 0 deletions core/common/http/HttpResponse.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,12 @@ class HttpResponse {
}

void SetStatusCode(int32_t code) { mStatusCode = code; }
void SetCurlCode(int32_t code) { mCurlCode = code; }
int32_t GetCurlCode() { return mCurlCode; }
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved

private:
int32_t mStatusCode = 0; // 0 means no response from server
int32_t mCurlCode = 0; // 0 means no error
std::map<std::string, std::string, decltype(compareHeader)*> mHeader;
std::unique_ptr<void, std::function<void(void*)>> mBody;
size_t (*mWriteCallback)(char*, size_t, size_t, void*) = nullptr;
Expand Down
1 change: 1 addition & 0 deletions core/monitor/metric_constants/MetricConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ extern const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS;
extern const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL;
extern const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TIME_MS;
extern const std::string METRIC_PLUGIN_PROM_SCRAPE_TIME_MS;
extern const std::string METRIC_PLUGIN_PROM_SCRAPE_STATE;
extern const std::string METRIC_PLUGIN_PROM_SCRAPE_DELAY_TOTAL;

/**********************************************************
Expand Down
1 change: 1 addition & 0 deletions core/monitor/metric_constants/PluginMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS = "prom_subscribe_targets
const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL = "prom_subscribe_total";
const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TIME_MS = "prom_subscribe_time_ms";
const std::string METRIC_PLUGIN_PROM_SCRAPE_TIME_MS = "prom_scrape_time_ms";
const std::string METRIC_PLUGIN_PROM_SCRAPE_STATE = "prom_scrape_state";
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved
const std::string METRIC_PLUGIN_PROM_SCRAPE_DELAY_TOTAL = "prom_scrape_delay_total";

/**********************************************************
Expand Down
153 changes: 126 additions & 27 deletions core/prometheus/PromSelfMonitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,28 @@
#include <string>
#include <unordered_map>

#include "common/StringTools.h"
#include "monitor/MetricTypes.h"
#include "monitor/metric_constants/MetricConstants.h"
using namespace std;

namespace logtail {

void PromSelfMonitorUnsafe::InitMetricManager(const std::unordered_map<std::string, MetricType>& metricKeys,
const MetricLabels& labels) {
const MetricLabels& labels) {
auto metricLabels = std::make_shared<MetricLabels>(labels);
mPluginMetricManagerPtr = std::make_shared<PluginMetricManager>(metricLabels, metricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE);
mPluginMetricManagerPtr = std::make_shared<PluginMetricManager>(
metricLabels, metricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE);
}

void PromSelfMonitorUnsafe::AddCounter(const std::string& metricName, uint64_t statusCode, uint64_t val) {
auto& status = StatusToString(statusCode);
void PromSelfMonitorUnsafe::AddCounter(const std::string& metricName, const string& status, uint64_t val) {
if (!mMetricsCounterMap.count(metricName) || !mMetricsCounterMap[metricName].count(status)) {
mMetricsCounterMap[metricName][status] = GetOrCreateReentrantMetricsRecordRef(status)->GetCounter(metricName);
}
mMetricsCounterMap[metricName][status]->Add(val);
}

void PromSelfMonitorUnsafe::SetIntGauge(const std::string& metricName, uint64_t statusCode, uint64_t value) {
auto& status = StatusToString(statusCode);
void PromSelfMonitorUnsafe::SetIntGauge(const std::string& metricName, const string& status, uint64_t value) {
if (!mMetricsIntGaugeMap.count(metricName) || !mMetricsIntGaugeMap[metricName].count(status)) {
mMetricsIntGaugeMap[metricName][status] = GetOrCreateReentrantMetricsRecordRef(status)->GetIntGauge(metricName);
}
Expand All @@ -43,29 +43,128 @@ ReentrantMetricsRecordRef PromSelfMonitorUnsafe::GetOrCreateReentrantMetricsReco
return mPromStatusMap[status];
}

std::string& PromSelfMonitorUnsafe::StatusToString(uint64_t status) {
static string sHttp0XX = "0XX";
static string sHttp1XX = "1XX";
static string sHttp2XX = "2XX";
static string sHttp3XX = "3XX";
static string sHttp4XX = "4XX";
static string sHttp5XX = "5XX";
std::string PromSelfMonitorUnsafe::StatusToString(uint64_t status) {
static string sHttpOther = "other";
if (status < 100) {
return sHttp0XX;
} else if (status < 200) {
return sHttp1XX;
} else if (status < 300) {
return sHttp2XX;
} else if (status < 400) {
return sHttp3XX;
} else if (status < 500) {
return sHttp4XX;
} else if (status < 500) {
return sHttp5XX;
} else {
return sHttpOther;
if (status < 600) {
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved
return ToString(status);
}
return sHttpOther;
}

std::string PromSelfMonitorUnsafe::CurlCodeToString(uint64_t code) {
static map<uint64_t, string> sCurlCodeMap = {{0, "OK"},
{7, "ERR_CONN_REFUSED"},
{9, "ERR_ACCESS_DENIED"},
{28, "ERR_TIMEOUT"},
{35, "ERR_SSL_CONN_ERR"},
{51, "ERR_SSL_CERT_ERR"},
{52, "ERR_SERVER_RESPONSE_NONE"},
{55, "ERR_SEND_DATA_FAILED"},
{56, "ERR_RECV_DATA_FAILED"}};
static string sCurlOther = "ERR_UNKNOWN";
if (sCurlCodeMap.count(code)) {
return sCurlCodeMap[code];
}
return sCurlOther;
}

// inused curl error code:
// 7 Couldn't connect to server
// 9 Access denied to remote resource
// 28 Timeout was reached
// 35 SSL connect error
// 51 SSL peer certificate or SSH remote key was not OK
// 52 Server returned nothing (no headers, no data)
// 55 Failed sending data to the peer
// 56 Failure when receiving data from the peer

// unused
// 0 No error
// 1 Unsupported protocol
// 2 Failed initialization
// 3 URL using bad/illegal format or missing URL
// 4 A requested feature, protocol or option was not found built-in in this libcurl due to a build-time decision.
// 5 Couldn't resolve proxy name
// 6 Couldn't resolve host name
// 8 Weird server reply
// 10 FTP: The server failed to connect to data port
// 11 FTP: unknown PASS reply
// 12 FTP: Accepting server connect has timed out
// 13 FTP: unknown PASV reply
// 14 FTP: unknown 227 response format
// 15 FTP: can't figure out the host in the PASV response
// 16 Error in the HTTP2 framing layer
// 17 FTP: couldn't set file type
// 18 Transferred a partial file
// 19 FTP: couldn't retrieve (RETR failed) the specified file
// 20 Unknown error
// 21 Quote command returned error
// 22 HTTP response code said error
// 23 Failed writing received data to disk/application
// 24 Unknown error
// 25 Upload failed (at start/before it took off)
// 26 Failed to open/read local data from file/application
// 27 Out of memory
// 29 Unknown error
// 30 FTP: command PORT failed
// 31 FTP: command REST failed
// 32 Unknown error
// 33 Requested range was not delivered by the server
// 34 Internal problem setting up the POST
// 36 Couldn't resume download
// 37 Couldn't read a file:// file
// 38 LDAP: cannot bind
// 39 LDAP: search failed
// 40 Unknown error
// 41 A required function in the library was not found
// 42 Operation was aborted by an application callback
// 43 A libcurl function was given a bad argument
// 44 Unknown error
// 45 Failed binding local connection end
// 46 Unknown error
// 47 Number of redirects hit maximum amount
// 48 An unknown option was passed in to libcurl
// 49 Malformed telnet option
// 50 Unknown error
// 53 SSL crypto engine not found
// 54 Can not set SSL crypto engine as default
// 57 Unknown error
// 58 Problem with the local SSL certificate
// 59 Couldn't use specified SSL cipher
// 60 Peer certificate cannot be authenticated with given CA certificates
// 61 Unrecognized or bad HTTP Content or Transfer-Encoding
// 62 Invalid LDAP URL
// 63 Maximum file size exceeded
// 64 Requested SSL level failed
// 65 Send failed since rewinding of the data stream failed
// 66 Failed to initialise SSL crypto engine
// 67 Login denied
// 68 TFTP: File Not Found
// 69 TFTP: Access Violation
// 70 Disk full or allocation exceeded
// 71 TFTP: Illegal operation
// 72 TFTP: Unknown transfer ID
// 73 Remote file already exists
// 74 TFTP: No such user
// 75 Conversion failed
// 76 Caller must register CURLOPT_CONV_ callback options
// 77 Problem with the SSL CA cert (path? access rights?)
// 78 Remote file not found
// 79 Error in the SSH layer
// 80 Failed to shut down the SSL connection
// 81 Socket not ready for send/recv
// 82 Failed to load CRL file (path? access rights?, format?)
// 83 Issuer check against peer certificate failed
// 84 FTP: The server did not accept the PRET command.
// 85 RTSP CSeq mismatch or invalid CSeq
// 86 RTSP session error
// 87 Unable to parse FTP file list
// 88 Chunk callback failed
// 89 The max connection limit is reached
// 90 SSL public key does not match pinned public key
// 91 SSL server certificate status verification FAILED
// 92 Stream error in the HTTP/2 framing layer
// 93 API function called from within callback
// 94 Unknown error

} // namespace logtail
10 changes: 6 additions & 4 deletions core/prometheus/PromSelfMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include <string>
#include <unordered_map>

#include "monitor/MetricManager.h"
#include "monitor/PluginMetricManager.h"

namespace logtail {
Expand All @@ -16,13 +15,16 @@ class PromSelfMonitorUnsafe {

void InitMetricManager(const std::unordered_map<std::string, MetricType>& metricKeys, const MetricLabels& labels);

void AddCounter(const std::string& metricName, uint64_t status, uint64_t val = 1);
void AddCounter(const std::string& metricName, const std::string& status, uint64_t val = 1);

void SetIntGauge(const std::string& metricName, uint64_t status, uint64_t value);
void SetIntGauge(const std::string& metricName, const std::string& status, uint64_t value);

static std::string StatusToString(uint64_t status);
static std::string CurlCodeToString(uint64_t code);

private:
ReentrantMetricsRecordRef GetOrCreateReentrantMetricsRecordRef(const std::string& status);
std::string& StatusToString(uint64_t status);


PluginMetricManagerPtr mPluginMetricManagerPtr;
std::map<std::string, ReentrantMetricsRecordRef> mPromStatusMap;
Expand Down
33 changes: 24 additions & 9 deletions core/prometheus/schedulers/ScrapeScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,25 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr<ScrapeConfig> scrapeConfigPtr,
void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t timestampMilliSec) {
auto& responseBody = *response.GetBody<PromMetricResponseBody>();
responseBody.FlushCache();
mSelfMonitor->AddCounter(METRIC_PLUGIN_OUT_EVENTS_TOTAL, response.GetStatusCode());
mSelfMonitor->AddCounter(METRIC_PLUGIN_OUT_SIZE_BYTES, response.GetStatusCode(), responseBody.mRawSize);
mSelfMonitor->AddCounter(METRIC_PLUGIN_OUT_EVENTS_TOTAL,
PromSelfMonitorUnsafe::StatusToString(response.GetStatusCode()));
mSelfMonitor->AddCounter(METRIC_PLUGIN_OUT_SIZE_BYTES,
PromSelfMonitorUnsafe::StatusToString(response.GetStatusCode()),
responseBody.mRawSize);
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_TIME_MS,
response.GetStatusCode(),
PromSelfMonitorUnsafe::StatusToString(response.GetStatusCode()),
GetCurrentTimeInMilliSeconds() - timestampMilliSec);
if (response.GetCurlCode() != 0) {
// not 0 means curl error
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_STATE,
PromSelfMonitorUnsafe::CurlCodeToString(response.GetCurlCode()));
} else if (response.GetStatusCode() != 200) {
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_STATE,
PromSelfMonitorUnsafe::StatusToString(response.GetStatusCode()));
} else {
// 0 means success
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_STATE, PromSelfMonitorUnsafe::CurlCodeToString(0));
}

mScrapeTimestampMilliSec = timestampMilliSec;
mScrapeDurationSeconds = 1.0 * (GetCurrentTimeInMilliSeconds() - timestampMilliSec) / 1000;
Expand Down Expand Up @@ -247,15 +261,16 @@ void ScrapeScheduler::InitSelfMonitor(const MetricLabels& defaultLabels) {
MetricLabels labels = defaultLabels;
labels.emplace_back(METRIC_LABEL_KEY_INSTANCE, mInstance);

static const std::unordered_map<std::string, MetricType> sScrapeMetricKeys = {
{METRIC_PLUGIN_OUT_EVENTS_TOTAL, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_OUT_SIZE_BYTES, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_PROM_SCRAPE_TIME_MS, MetricType::METRIC_TYPE_COUNTER},
};
static const std::unordered_map<std::string, MetricType> sScrapeMetricKeys
= {{METRIC_PLUGIN_OUT_EVENTS_TOTAL, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_OUT_SIZE_BYTES, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_PROM_SCRAPE_TIME_MS, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_PROM_SCRAPE_STATE, MetricType::METRIC_TYPE_COUNTER}};

mSelfMonitor->InitMetricManager(sScrapeMetricKeys, labels);

WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE, std::move(labels));
WriteMetrics::GetInstance()->PrepareMetricsRecordRef(
mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE, std::move(labels));
mPromDelayTotal = mMetricsRecordRef.CreateCounter(METRIC_PLUGIN_PROM_SCRAPE_DELAY_TOTAL);
mPluginTotalDelayMs = mMetricsRecordRef.CreateCounter(METRIC_PLUGIN_TOTAL_DELAY_MS);
}
Expand Down
8 changes: 5 additions & 3 deletions core/prometheus/schedulers/TargetSubscriberScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,10 @@ bool TargetSubscriberScheduler::operator<(const TargetSubscriberScheduler& other
}

void TargetSubscriberScheduler::OnSubscription(HttpResponse& response, uint64_t timestampMilliSec) {
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL, response.GetStatusCode());
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL,
PromSelfMonitorUnsafe::StatusToString(response.GetStatusCode()));
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TIME_MS,
response.GetStatusCode(),
PromSelfMonitorUnsafe::StatusToString(response.GetStatusCode()),
GetCurrentTimeInMilliSeconds() - timestampMilliSec);
if (response.GetStatusCode() == 304) {
// not modified
Expand Down Expand Up @@ -336,7 +337,8 @@ void TargetSubscriberScheduler::InitSelfMonitor(const MetricLabels& defaultLabel
mSelfMonitor = std::make_shared<PromSelfMonitorUnsafe>();
mSelfMonitor->InitMetricManager(sSubscriberMetricKeys, mDefaultLabels);

WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE, std::move(mDefaultLabels));
WriteMetrics::GetInstance()->PrepareMetricsRecordRef(
mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE, std::move(mDefaultLabels));
mPromSubscriberTargets = mMetricsRecordRef.CreateIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS);
mTotalDelayMs = mMetricsRecordRef.CreateCounter(METRIC_PLUGIN_TOTAL_DELAY_MS);
}
Expand Down
Loading
Loading