Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add prometheus text parser lib #1608

Merged
merged 9 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ set(SUB_DIRECTORIES_LIST
batch application app_config checkpoint compression config config/provider config/watcher config_manager config_server_pb
container_manager controller event event_handler event_listener file_server go_pipeline log_pb logger
models monitor parser pipeline plugin plugin/creator plugin/instance plugin/interface polling
profile_sender queue reader sdk sender serializer sls_control fuse
profile_sender queue reader sdk sender serializer sls_control fuse prometheus
)
if (LINUX)
if (ENABLE_ENTERPRISE)
Expand Down
150 changes: 150 additions & 0 deletions core/prometheus/TextParser.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Copyright 2024 iLogtail Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "prometheus/TextParser.h"

#include <boost/algorithm/string.hpp>
#include <chrono>
#include <cmath>
#include <sstream>
#include <string>

#include "logger/Logger.h"
#include "models/MetricEvent.h"
#include "re2/re2.h"

using namespace std;

namespace logtail {

const std::string SAMPLE_RE = R"""(^(?P<name>\w+)(\{(?P<labels>[^}]+)\})?\s+(?P<value>\S+)(\s+(?P<timestamp>\S+))?)""";

PipelineEventGroup TextParser::Parse(const string& content) {
auto now = std::chrono::system_clock::now();
auto duration_since_epoch = now.time_since_epoch();
auto seconds_since_epoch = std::chrono::duration_cast<std::chrono::seconds>(duration_since_epoch);
std::time_t defaultTsInSecs = seconds_since_epoch.count();
return Parse(content, defaultTsInSecs, "", "");
}

PipelineEventGroup
TextParser::Parse(const string& content, const time_t defaultTsInSecs, const string& jobName, const string& instance) {
string line;
string argName, argLabels, argUnwrappedLabels, argValue, argSuffix, argTimestamp;
istringstream iss(content);
auto eGroup = PipelineEventGroup(make_shared<SourceBuffer>());
while (getline(iss, line)) {
// trim line
boost::algorithm::trim(line);

// skip any empty line
if (line.empty()) {
continue;
}

// skip any comment
if (line[0] == '#') {
continue;
}

// parse line
// for given sample R"""(test_metric{k1="v1", k2="v2"} 9.9410452992e+10 1715829785083)"""
// argName = "test_metric"
// argLabels = R"""({"k1="v1", k2="v2"})"""
// argUnwrappedLabels = R"""(k1="v1", k2="v2")"""
// argValue = "9.9410452992e+10"
// argSuffix = " 1715829785083"
// argTimestamp = "1715829785083"
RE2::FullMatch(line,
mSampleRegex,
RE2::Arg(&argName),
RE2::Arg(&argLabels),
RE2::Arg(&argUnwrappedLabels),
RE2::Arg(&argValue),
RE2::Arg(&argSuffix),
RE2::Arg(&argTimestamp));

// skip any sample that has no name
if (argName.empty()) {
continue;
}

// skip any sample that has a NaN value
double value;
try {
value = stod(argValue);
} catch (const exception&) {
continue;
}
if (isnan(value)) {
continue;
}

// set timestamp to `defaultTsInSecs` if timestamp is empty, otherwise parse it
// if timestamp is not empty but not a valid integer, skip it
time_t timestamp;
if (argTimestamp.empty()) {
timestamp = defaultTsInSecs;
} else {
try {
// TODO: check if timestamp is out of window (e.g. 24h)
timestamp = stol(argTimestamp) / 1000;
// TODO: convert milli-second part into nano-second
} catch (const exception&) {
continue;
}
}

MetricEvent* e = eGroup.AddMetricEvent();
e->SetName(argName);
e->SetTimestamp(timestamp);
e->SetValue<UntypedSingleValue>(value);

if (!argUnwrappedLabels.empty()) {
string kvPair;
istringstream iss(argUnwrappedLabels);
while (getline(iss, kvPair, ',')) {
boost::algorithm::trim(kvPair);

size_t equalsPos = kvPair.find('=');
if (equalsPos != string::npos) {
string key = kvPair.substr(0, equalsPos);
string value = kvPair.substr(equalsPos + 1);
boost::trim_if(value, boost::is_any_of("\""));
e->SetTag(key, value);
}
}
}
if (!jobName.empty()) {
e->SetTag(string("job"), jobName);
}
if (!instance.empty()) {
e->SetTag(string("instance"), instance);
}
}

return eGroup;
}

bool TextParser::Ok() const {
return mErr == nullptr;
}

std::shared_ptr<std::exception> TextParser::Err() const {
return mErr;
}

} // namespace logtail
55 changes: 55 additions & 0 deletions core/prometheus/TextParser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright 2024 iLogtail Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <re2/re2.h>

#include "models/MetricEvent.h"
#include "models/PipelineEventGroup.h"

namespace logtail {

extern const std::string SAMPLE_RE;

class TextParser {
public:
TextParser(const std::shared_ptr<SourceBuffer>& sourceBuffer)
: mSourceBuffer(sourceBuffer), mSampleRegex(SAMPLE_RE) {
if (!mSampleRegex.ok()) {
mErr = std::make_shared<std::exception>(std::invalid_argument("invalid regex"));
}
}
PipelineEventGroup Parse(const std::string& content);
PipelineEventGroup Parse(const std::string& content,
std::time_t defaultTs,
const std::string& jobName = "",
const std::string& instance = "");

bool Ok() const;
std::shared_ptr<std::exception> Err() const;

private:
std::shared_ptr<std::exception> mErr;
std::shared_ptr<SourceBuffer> mSourceBuffer;
RE2 mSampleRegex;

#ifdef APSARA_UNIT_TEST_MAIN
friend class TextParserUnittest;
#endif
};

} // namespace logtail
1 change: 1 addition & 0 deletions core/unittest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ add_subdirectory(reader)
add_subdirectory(sdk)
add_subdirectory(sender)
add_subdirectory(serializer)
add_subdirectory(prometheus)

if (LINUX)
if (NOT WITHOUTSPL)
Expand Down
23 changes: 23 additions & 0 deletions core/unittest/prometheus/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright 2024 iLogtail Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required(VERSION 3.22)
project(prometheus_unittest)

add_executable(textparser_unittest TextParserUnittest.cpp)
target_link_libraries(textparser_unittest unittest_base)

include(GoogleTest)

gtest_discover_tests(textparser_unittest)
Loading
Loading