diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index b034189144..30ed0d956a 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -114,7 +114,7 @@ set(SUB_DIRECTORIES_LIST batch application app_config checkpoint compression config config/feedbacker config/provider config/watcher config_manager config_server_pb/v1 config_server_pb/v2 container_manager controller event event_handler event_listener file_server go_pipeline log_pb logger models monitor parser pipeline plugin plugin/creator plugin/instance plugin/interface polling - profile_sender queue reader sdk sender serializer sls_control fuse + profile_sender queue reader sdk sender serializer sls_control fuse prometheus ) if (LINUX) if (ENABLE_ENTERPRISE) diff --git a/core/prometheus/TextParser.cpp b/core/prometheus/TextParser.cpp new file mode 100644 index 0000000000..560e56e978 --- /dev/null +++ b/core/prometheus/TextParser.cpp @@ -0,0 +1,155 @@ +/* + * Copyright 2024 iLogtail Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "prometheus/TextParser.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common/StringTools.h" +#include "logger/Logger.h" +#include "models/MetricEvent.h" + +using namespace std; + +namespace logtail { + +// TODO: 830移除正则依赖 +const std::string SAMPLE_RE = R"""(^(?P\w+)(\{(?P[^}]+)\})?\s+(?P\S+)(\s+(?P\S+))?)"""; +const string JOB = "job"; +const string INSTANCE = "instance"; + +PipelineEventGroup TextParser::Parse(const string& content) { + auto now = std::chrono::system_clock::now(); + auto duration_since_epoch = now.time_since_epoch(); + auto seconds_since_epoch = std::chrono::duration_cast(duration_since_epoch); + std::time_t defaultTsInSecs = seconds_since_epoch.count(); + return Parse(content, defaultTsInSecs, "", ""); +} + +// TODO: jobName和instance在后续移动到接近业务的位置 +PipelineEventGroup +TextParser::Parse(const string& content, const time_t defaultTsInSecs, const string& jobName, const string& instance) { + string line; + string argName, argLabels, argUnwrappedLabels, argValue, argSuffix, argTimestamp; + istringstream iss(content); + auto eGroup = PipelineEventGroup(make_shared()); + while (getline(iss, line)) { + // trim line + line = TrimString(line); + + // skip any empty line + if (line.empty()) { + continue; + } + + // skip any comment + if (line[0] == '#') { + continue; + } + + // parse line + // for given sample R"""(test_metric{k1="v1", k2="v2"} 9.9410452992e+10 1715829785083)""" + // argName = "test_metric" + // argLabels = R"""({"k1="v1", k2="v2"})""" + // argUnwrappedLabels = R"""(k1="v1", k2="v2")""" + // argValue = "9.9410452992e+10" + // argSuffix = " 1715829785083" + // argTimestamp = "1715829785083" + if (RE2::FullMatch(line, + mSampleRegex, + RE2::Arg(&argName), + RE2::Arg(&argLabels), + RE2::Arg(&argUnwrappedLabels), + RE2::Arg(&argValue), + RE2::Arg(&argSuffix), + RE2::Arg(&argTimestamp)) + == false) { + continue; + } + + // skip any sample that has no name + if (argName.empty()) { + continue; + } + + // skip any sample that has a NaN value + double value = 0; + try { + value = stod(argValue); + } catch (const exception&) { + LOG_WARNING(sLogger, ("invalid value", argValue)("raw line", line)); + continue; + } + if (isnan(value)) { + continue; + } + + // set timestamp to `defaultTsInSecs` if timestamp is empty, otherwise parse it + // if timestamp is not empty but not a valid integer, skip it + time_t timestamp; + if (argTimestamp.empty()) { + timestamp = defaultTsInSecs; + } else { + try { + // TODO: check if timestamp is out of window (e.g. 24h) + timestamp = stol(argTimestamp) / 1000; + // TODO: convert milli-second part into nano-second + } catch (const exception&) { + LOG_WARNING(sLogger, ("invalid value", argTimestamp)("raw line", line)); + continue; + } + } + + MetricEvent* e = eGroup.AddMetricEvent(); + e->SetName(argName); + e->SetTimestamp(timestamp); + e->SetValue(value); + + if (!argUnwrappedLabels.empty()) { + string kvPair; + istringstream iss(argUnwrappedLabels); + while (getline(iss, kvPair, ',')) { + kvPair = TrimString(kvPair); + + size_t equalsPos = kvPair.find('='); + if (equalsPos != string::npos) { + string key = kvPair.substr(0, equalsPos); + string value = kvPair.substr(equalsPos + 1); + value = TrimString(value, '\"', '\"'); + e->SetTag(key, value); + } + } + } + if (!jobName.empty()) { + e->SetTag(JOB, jobName); + } + if (!instance.empty()) { + e->SetTag(INSTANCE, instance); + } + } + + return eGroup; +} + +} // namespace logtail diff --git a/core/prometheus/TextParser.h b/core/prometheus/TextParser.h new file mode 100644 index 0000000000..20536628ea --- /dev/null +++ b/core/prometheus/TextParser.h @@ -0,0 +1,47 @@ +/* + * Copyright 2024 iLogtail Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +#include "models/PipelineEventGroup.h" + +namespace logtail { + +extern const std::string SAMPLE_RE; + +class TextParser { +public: + TextParser() : mSampleRegex(SAMPLE_RE) {} + PipelineEventGroup Parse(const std::string& content); + + + PipelineEventGroup Parse(const std::string& content, + std::time_t defaultTs, + const std::string& jobName = "", + const std::string& instance = ""); +private: + RE2 mSampleRegex; + +#ifdef APSARA_UNIT_TEST_MAIN + friend class TextParserUnittest; +#endif +}; + +} // namespace logtail diff --git a/core/unittest/CMakeLists.txt b/core/unittest/CMakeLists.txt index 3c89794544..1ef6e9f1a2 100644 --- a/core/unittest/CMakeLists.txt +++ b/core/unittest/CMakeLists.txt @@ -49,6 +49,7 @@ macro(add_core_subdir) add_subdirectory(sdk) add_subdirectory(sender) add_subdirectory(serializer) + add_subdirectory(prometheus) if (LINUX) add_subdirectory(observer) endif () diff --git a/core/unittest/prometheus/CMakeLists.txt b/core/unittest/prometheus/CMakeLists.txt new file mode 100644 index 0000000000..f7ab86833c --- /dev/null +++ b/core/unittest/prometheus/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright 2024 iLogtail Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.22) +project(prometheus_unittest) + +add_executable(textparser_unittest TextParserUnittest.cpp) +target_link_libraries(textparser_unittest unittest_base) + +include(GoogleTest) + +gtest_discover_tests(textparser_unittest) diff --git a/core/unittest/prometheus/TextParserUnittest.cpp b/core/unittest/prometheus/TextParserUnittest.cpp new file mode 100644 index 0000000000..47cdedb8db --- /dev/null +++ b/core/unittest/prometheus/TextParserUnittest.cpp @@ -0,0 +1,153 @@ +/* + * Copyright 2024 iLogtail Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "MetricEvent.h" +#include "models/PipelineEventGroup.h" +#include "prometheus/TextParser.h" +#include "unittest/Unittest.h" + +using namespace std; + +namespace logtail { + +bool isDoubleEqual(double a, double b) { + return fabs(a - b) < 0.000001; +} + +class TextParserUnittest : public testing::Test { +public: + void TestMetricEvent() const; + void TestParseMultipleLines() const; + void TestSampleRegex() const; + void TestParseMetricWithTagsAndTimestamp() const; + void TestParseMetricWithManyTags() const; +}; + +void TextParserUnittest::TestMetricEvent() const { + const auto& srcBuf = make_shared(); + auto eGroup = PipelineEventGroup(srcBuf); + auto event = eGroup.AddMetricEvent(); + event->SetName("test_metric"); + event->SetValue(MetricValue(UntypedSingleValue{1.0})); + event->SetTimestamp(1234567890); + event->SetTag(StringView("test_key"), StringView("test_value")); + + const auto& events = eGroup.GetEvents(); + APSARA_TEST_EQUAL(1UL, events.size()); + const auto& firstEvent = &events.front(); + const auto& firstMetric = firstEvent->Get(); + APSARA_TEST_STREQ("test_metric", firstMetric->GetName().data()); + const auto& metricValue = firstMetric->GetValue(); + APSARA_TEST_EQUAL(1.0, metricValue->mValue); + APSARA_TEST_EQUAL(1234567890, firstMetric->GetTimestamp()); + APSARA_TEST_STREQ("test_value", firstMetric->GetTag(logtail::StringView("test_key")).data()); +} +UNIT_TEST_CASE(TextParserUnittest, TestMetricEvent) + +void TextParserUnittest::TestParseMultipleLines() const { + auto parser = TextParser(); + const auto eGroup = parser.Parse(R"""( +# begin + +test_metric1{k1="v1", k2="v 1.0 + test_metric2{k1="v1", k2="v2"} 2.0 1234567890 +test_metric3{k1="v1",k2="v2"} 9.9410452992e+10 + test_metric4{k1="v1",k2="v2"} 9.9410452992e+10 1715829785083 + test_metric5{k1="v1", k2="v2" } 9.9410452992e+10 1715829785083 +test_metric6{k1="v1",k2="v2",} 9.9410452992e+10 1715829785083 +test_metric7{k1="v1",k2="v2", } 9.9410452992e+10 1715829785083 +test_metric8{k1="v1", k2="v2", } 9.9410452992e+10 1715829785083 + +# end + )"""); + const auto& events = &eGroup.GetEvents(); + APSARA_TEST_EQUAL(7UL, events->size()); +} +UNIT_TEST_CASE(TextParserUnittest, TestParseMultipleLines) + +void TextParserUnittest::TestParseMetricWithTagsAndTimestamp() const { + auto parser = TextParser(); + const auto eGroup = parser.Parse(R"""( + test_metric{k1="v1", k2="v2"} 9.9410452992e+10 1715829785083 + test_metric2{k1="v1", k2="v2"} 2.0 1715829785083 + test_metric3{k1="v1",k2="v2"} 4.2 1715829785083 + )"""); + + // test_metric + const auto& events = &eGroup.GetEvents(); + APSARA_TEST_EQUAL(3UL, events->size()); + const auto& event = events->front(); + const auto& metric = event.Get(); + APSARA_TEST_STREQ("test_metric", metric->GetName().data()); + APSARA_TEST_EQUAL(1715829785, metric->GetTimestamp()); + APSARA_TEST_TRUE(isDoubleEqual(9.9410452992e+10, metric->GetValue()->mValue)); + APSARA_TEST_STREQ("v1", metric->GetTag("k1").data()); + APSARA_TEST_STREQ("v2", metric->GetTag("k2").data()); + + // test_metric2 + auto& event2 = events->at(1); + const auto& metric2 = event2.Get(); + APSARA_TEST_STREQ("test_metric2", metric2->GetName().data()); + APSARA_TEST_EQUAL(1715829785, metric2->GetTimestamp()); + APSARA_TEST_TRUE(isDoubleEqual(2.0, metric2->GetValue()->mValue)); + APSARA_TEST_STREQ("v1", metric2->GetTag("k1").data()); + APSARA_TEST_STREQ("v2", metric2->GetTag("k2").data()); + + // test_metric3 + auto& event3 = events->at(2); + const auto& metric3 = event3.Get(); + APSARA_TEST_STREQ("test_metric3", metric3->GetName().data()); + APSARA_TEST_EQUAL(1715829785, metric3->GetTimestamp()); + APSARA_TEST_TRUE(isDoubleEqual(4.2, metric3->GetValue()->mValue)); + APSARA_TEST_STREQ("v1", metric3->GetTag("k1").data()); + APSARA_TEST_STREQ("v2", metric3->GetTag("k2").data()); +} +UNIT_TEST_CASE(TextParserUnittest, TestParseMetricWithTagsAndTimestamp) + +void TextParserUnittest::TestParseMetricWithManyTags() const { + auto parser = TextParser(); + const auto eGroup = parser.Parse( + R"""(container_blkio_device_usage_total{container="",device="/dev/nvme0n1",id="/",image="",major="259",minor="0",name="",namespace="",operation="Async",pod=""} 9.9410452992e+10 1715829785083)""", + 1715829785083, + "test_job", + "test_instance"); + const auto& events = &eGroup.GetEvents(); + APSARA_TEST_EQUAL(1UL, events->size()); + const auto& event = events->front(); + const auto& metric = event.Get(); + APSARA_TEST_STREQ("container_blkio_device_usage_total", metric->GetName().data()); + APSARA_TEST_EQUAL(1715829785, metric->GetTimestamp()); + APSARA_TEST_TRUE(isDoubleEqual(9.9410452992e+10, metric->GetValue()->mValue)); + + // TODO: assert number of tags + APSARA_TEST_STREQ("", metric->GetTag("container").data()); + APSARA_TEST_STREQ("/dev/nvme0n1", metric->GetTag("device").data()); + APSARA_TEST_STREQ("/", metric->GetTag("id").data()); + APSARA_TEST_STREQ("", metric->GetTag("image").data()); + APSARA_TEST_STREQ("259", metric->GetTag("major").data()); + APSARA_TEST_STREQ("0", metric->GetTag("minor").data()); + APSARA_TEST_STREQ("", metric->GetTag("name").data()); + APSARA_TEST_STREQ("", metric->GetTag("namespace").data()); + APSARA_TEST_STREQ("Async", metric->GetTag("operation").data()); + APSARA_TEST_STREQ("", metric->GetTag("pod").data()); +} +UNIT_TEST_CASE(TextParserUnittest, TestParseMetricWithManyTags) + +} // namespace logtail + +UNIT_TEST_MAIN