diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 3dd6af2c93..6ab1f0a6a1 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -51,7 +51,7 @@ else () endif () if (NOT WITHSPL) - add_definitions(-D__EXCLUDE_SPL__) + add_definitions(-D__EXCLUDE_SSE4_2__) endif() # Default C/CXX flags. diff --git a/core/pipeline/plugin/PluginRegistry.cpp b/core/pipeline/plugin/PluginRegistry.cpp index 6a0061c68a..d205aa7075 100644 --- a/core/pipeline/plugin/PluginRegistry.cpp +++ b/core/pipeline/plugin/PluginRegistry.cpp @@ -61,7 +61,7 @@ #include "plugin/processor/inner/ProcessorSplitLogStringNative.h" #include "plugin/processor/inner/ProcessorSplitMultilineLogStringNative.h" #include "plugin/processor/inner/ProcessorTagNative.h" -#if defined(__linux__) && !defined(__ANDROID__) && !defined(__EXCLUDE_SPL__) +#if defined(__linux__) && !defined(__ANDROID__) && !defined(__EXCLUDE_SSE4_2__) #include "plugin/processor/ProcessorSPL.h" #endif @@ -154,7 +154,7 @@ void PluginRegistry::LoadStaticPlugins() { RegisterProcessorCreator(new StaticProcessorCreator()); RegisterProcessorCreator(new StaticProcessorCreator()); RegisterProcessorCreator(new StaticProcessorCreator()); -#if defined(__linux__) && !defined(__ANDROID__) && !defined(__EXCLUDE_SPL__) +#if defined(__linux__) && !defined(__ANDROID__) && !defined(__EXCLUDE_SSE4_2__) if (BOOL_FLAG(enable_processor_spl)) { RegisterProcessorCreator(new StaticProcessorCreator()); } diff --git a/core/plugin/processor/inner/ProcessorPromParseMetricNative.cpp b/core/plugin/processor/inner/ProcessorPromParseMetricNative.cpp index 95b81a569a..5c28375b82 100644 --- a/core/plugin/processor/inner/ProcessorPromParseMetricNative.cpp +++ b/core/plugin/processor/inner/ProcessorPromParseMetricNative.cpp @@ -32,7 +32,7 @@ void ProcessorPromParseMetricNative::Process(PipelineEventGroup& eGroup) { auto timestampMilliSec = StringTo(scrapeTimestampMilliSecStr.to_string()); auto timestamp = timestampMilliSec / 1000; auto nanoSec = timestampMilliSec % 1000 * 1000000; - TextParser parser(mScrapeConfigPtr->mHonorTimestamps); + prom::TextParser parser(mScrapeConfigPtr->mHonorTimestamps); parser.SetDefaultTimestamp(timestamp, nanoSec); for (auto& e : events) { @@ -49,7 +49,7 @@ bool ProcessorPromParseMetricNative::IsSupportedEvent(const PipelineEventPtr& e) bool ProcessorPromParseMetricNative::ProcessEvent(PipelineEventPtr& e, EventsContainer& newEvents, PipelineEventGroup& eGroup, - TextParser& parser) { + prom::TextParser& parser) { if (!IsSupportedEvent(e)) { return false; } diff --git a/core/plugin/processor/inner/ProcessorPromParseMetricNative.h b/core/plugin/processor/inner/ProcessorPromParseMetricNative.h index f9c036c58a..3300cf25f4 100644 --- a/core/plugin/processor/inner/ProcessorPromParseMetricNative.h +++ b/core/plugin/processor/inner/ProcessorPromParseMetricNative.h @@ -21,7 +21,7 @@ class ProcessorPromParseMetricNative : public Processor { bool IsSupportedEvent(const PipelineEventPtr&) const override; private: - bool ProcessEvent(PipelineEventPtr&, EventsContainer&, PipelineEventGroup&, TextParser& parser); + bool ProcessEvent(PipelineEventPtr&, EventsContainer&, PipelineEventGroup&, prom::TextParser& parser); std::unique_ptr mScrapeConfigPtr; #ifdef APSARA_UNIT_TEST_MAIN diff --git a/core/prometheus/labels/TextParser.cpp b/core/prometheus/labels/TextParser.cpp index a3ecb394ab..07f8ace523 100644 --- a/core/prometheus/labels/TextParser.cpp +++ b/core/prometheus/labels/TextParser.cpp @@ -16,29 +16,21 @@ #include "prometheus/labels/TextParser.h" +#include + #include -#include #include #include -#include "common/StringTools.h" #include "logger/Logger.h" #include "models/MetricEvent.h" #include "models/PipelineEventGroup.h" #include "models/StringView.h" -#include "prometheus/Constants.h" #include "prometheus/Utils.h" using namespace std; -namespace logtail { - -bool IsValidNumberChar(char c) { - static const unordered_set sValidChars - = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', '-', '+', 'e', 'E', 'I', - 'N', 'F', 'T', 'Y', 'i', 'n', 'f', 't', 'y', 'X', 'x', 'N', 'n', 'A', 'a'}; - return sValidChars.count(c); -}; +namespace logtail::prom { TextParser::TextParser(bool honorTimestamps) : mHonorTimestamps(honorTimestamps) { } @@ -69,13 +61,11 @@ PipelineEventGroup TextParser::Parse(const string& content, uint64_t defaultTime } bool TextParser::ParseLine(StringView line, MetricEvent& metricEvent) { - mLine = line; - mPos = 0; mState = TextState::Start; mLabelName.clear(); - mTokenLength = 0; + mEscape = FindFirstLetter(line.data(), line.size(), '\\').has_value(); - HandleStart(metricEvent); + HandleStart(metricEvent, line.data(), line.size()); if (mState == TextState::Done) { return true; @@ -84,228 +74,316 @@ bool TextParser::ParseLine(StringView line, MetricEvent& metricEvent) { return false; } -// start to parse metric sample:test_metric{k1="v1", k2="v2" } 9.9410452992e+10 1715829785083 # exemplarsxxx -void TextParser::HandleStart(MetricEvent& metricEvent) { - SkipLeadingWhitespace(); - auto c = (mPos < mLine.size()) ? mLine[mPos] : '\0'; - if (std::isalpha(c) || c == '_' || c == ':') { - HandleMetricName(metricEvent); - } else { - HandleError("expected metric name"); +std::optional TextParser::FindFirstLetter(const char* s, size_t len, char target) { + size_t res = 0; +#if !defined(__EXCLUDE_SSE4_2__) + __m128i targetVec = _mm_set1_epi8(target); + + while (res + 16 < len) { + __m128i chunk = _mm_loadu_si128(reinterpret_cast(&s[res])); + + __m128i cmp = _mm_cmpeq_epi8(chunk, targetVec); + + int mask = _mm_movemask_epi8(cmp); + + if (mask != 0) { + return res + __builtin_ffs(mask) - 1; + } + + res += 16; + } +#endif + + while (res < len) { + if (s[res] == target) { + return res; + } + res++; } + return std::nullopt; } -// parse:test_metric{k1="v1", k2="v2" } 9.9410452992e+10 1715829785083 # exemplarsxxx -void TextParser::HandleMetricName(MetricEvent& metricEvent) { - char c = (mPos < mLine.size()) ? mLine[mPos] : '\0'; - while (std::isalpha(c) || c == '_' || c == ':' || std::isdigit(c)) { - ++mTokenLength; - ++mPos; - c = (mPos < mLine.size()) ? mLine[mPos] : '\0'; +std::optional TextParser::FindFirstWhiteSpace(const char* s, size_t len) { + size_t res = 0; + +#if !defined(__EXCLUDE_SSE4_2__) + static __m128i sTargetVec1 = _mm_set1_epi8(' '); + static __m128i sTargetVec2 = _mm_set1_epi8('\t'); + + while (res + 16 < len) { + __m128i chunk = _mm_loadu_si128(reinterpret_cast(&s[res])); + + __m128i cmp1 = _mm_cmpeq_epi8(chunk, sTargetVec1); + __m128i cmp2 = _mm_cmpeq_epi8(chunk, sTargetVec2); + + int mask1 = _mm_movemask_epi8(cmp1); + int mask2 = _mm_movemask_epi8(cmp2); + + if (mask1 != 0) { + return res + __builtin_ffs(mask1) - 1; + } + if (mask2 != 0) { + return res + __builtin_ffs(mask2) - 1; + } + + res += 16; } - metricEvent.SetNameNoCopy(mLine.substr(mPos - mTokenLength, mTokenLength)); - mTokenLength = 0; - SkipLeadingWhitespace(); - if (mPos < mLine.size()) { - if (mLine[mPos] == '{') { - ++mPos; - SkipLeadingWhitespace(); - HandleLabelName(metricEvent); - } else { - HandleSampleValue(metricEvent); +#endif + + while (res < len) { + if (s[res] == ' ' || s[res] == '\t') { + return res; } - } else { - HandleError("error end of metric name"); + res++; } + return std::nullopt; } -// parse:k1="v1", k2="v2" } 9.9410452992e+10 1715829785083 # exemplarsxxx -void TextParser::HandleLabelName(MetricEvent& metricEvent) { - char c = (mPos < mLine.size()) ? mLine[mPos] : '\0'; - if (std::isalpha(c) || c == '_') { - while (std::isalpha(c) || c == '_' || std::isdigit(c)) { - ++mTokenLength; - ++mPos; - c = (mPos < mLine.size()) ? mLine[mPos] : '\0'; +std::optional TextParser::FindWhiteSpaceAndExemplar(const char* s, size_t len) { + size_t res = 0; + +#if !defined(__EXCLUDE_SSE4_2__) + static __m128i sTargetVec1 = _mm_set1_epi8(' '); + static __m128i sTargetVec2 = _mm_set1_epi8('\t'); + static __m128i sTargetVec3 = _mm_set1_epi8('#'); + + while (res + 16 < len) { + __m128i chunk = _mm_loadu_si128(reinterpret_cast(&s[res])); + + __m128i cmp1 = _mm_cmpeq_epi8(chunk, sTargetVec1); + __m128i cmp2 = _mm_cmpeq_epi8(chunk, sTargetVec2); + __m128i cmp3 = _mm_cmpeq_epi8(chunk, sTargetVec3); + + int mask1 = _mm_movemask_epi8(cmp1); + int mask2 = _mm_movemask_epi8(cmp2); + int mask3 = _mm_movemask_epi8(cmp3); + + if (mask1 != 0) { + return res + __builtin_ffs(mask1) - 1; + } + if (mask2 != 0) { + return res + __builtin_ffs(mask2) - 1; + } + if (mask3 != 0) { + return res + __builtin_ffs(mask3) - 1; + } + + res += 16; + } +#endif + + while (res < len) { + if (s[res] == ' ' || s[res] == '\t' || s[res] == '#') { + return res; } - mLabelName = mLine.substr(mPos - mTokenLength, mTokenLength); - mTokenLength = 0; - SkipLeadingWhitespace(); - if (mPos == mLine.size() || mLine[mPos] != '=') { - HandleError("expected '=' after label name"); + res++; + } + return std::nullopt; +} + +std::optional TextParser::SkipTrailingWhitespace(const char* s, size_t pos) { + for (; pos > 0 && (s[pos] == ' ' || s[pos] == '\t'); --pos) { + } + if (pos == 0 && (s[pos] == ' ' || s[pos] == '\t')) { + return std::nullopt; + } + return pos; +} + +inline size_t TextParser::SkipLeadingWhitespace(const char* s, size_t len, size_t pos) { + while (pos < len && (s[pos] == ' ' || s[pos] == '\t')) { + pos++; + } + return pos; +} + +void TextParser::HandleError(const string& errMsg) { + LOG_WARNING(sLogger, ("text parser error parsing line", errMsg)); + mState = TextState::Error; +} + +void TextParser::HandleStart(MetricEvent& metricEvent, const char* s, const size_t len) { + auto pos = SkipLeadingWhitespace(s, len, 0); + HandleMetricName(metricEvent, s + pos, len - pos); +} + +void TextParser::HandleMetricName(MetricEvent& metricEvent, const char* s, size_t len) { + auto pos = FindFirstLetter(s, len, '{'); + if (pos.has_value()) { + auto endPos = SkipTrailingWhitespace(s, pos.value() - 1); + if (endPos.has_value()) { + metricEvent.SetNameNoCopy(StringView(s, endPos.value() + 1)); + } else { + HandleError("error end of metric name"); return; } - ++mPos; - SkipLeadingWhitespace(); - HandleEqualSign(metricEvent); - } else if (c == '}') { - ++mPos; - SkipLeadingWhitespace(); - HandleSampleValue(metricEvent); + auto nextPos = SkipLeadingWhitespace(s, len, pos.value() + 1); + HandleLabelName(metricEvent, s + nextPos, len - nextPos); } else { - HandleError("invalid character in label name"); + auto nextPos = FindFirstWhiteSpace(s, len); + if (nextPos.has_value()) { + metricEvent.SetNameNoCopy(StringView(s, nextPos.value())); + auto nextNextPos = SkipLeadingWhitespace(s, len, nextPos.value()); + HandleSampleValue(metricEvent, s + nextNextPos, len - nextNextPos); + } } } -// parse:"v1", k2="v2" } 9.9410452992e+10 1715829785083 # exemplarsxxx -void TextParser::HandleEqualSign(MetricEvent& metricEvent) { - if (mPos < mLine.size() && mLine[mPos] == '"') { - ++mPos; - HandleLabelValue(metricEvent); +void TextParser::HandleLabelName(MetricEvent& metricEvent, const char* s, size_t len) { + auto pos = FindFirstLetter(s, len, '='); + if (pos.has_value()) { + auto endPos = SkipTrailingWhitespace(s, pos.value() - 1); + if (endPos.has_value()) { + if (FindFirstLetter(s, endPos.value(), '"').has_value()) { + HandleError("invalid character in label name"); + return; + } + mLabelName = StringView(s, endPos.value() + 1); + } else { + HandleError("error end of metric name"); + return; + } + auto nextPos = SkipLeadingWhitespace(s, len, pos.value() + 1); + HandleLabelValue(metricEvent, s + nextPos, len - nextPos); } else { - HandleError("expected '\"' after '='"); + if (len > 0 && s[0] == '}') { + auto nextPos = SkipLeadingWhitespace(s, len, 1); + HandleSampleValue(metricEvent, s + nextPos, len - nextPos); + } else { + HandleError("invalid character in label name"); + } } } -// parse:v1", k2="v2" } 9.9410452992e+10 1715829785083 # exemplarsxxx -void TextParser::HandleLabelValue(MetricEvent& metricEvent) { +void TextParser::HandleLabelValue(MetricEvent& metricEvent, const char* s, size_t len) { // left quote has been consumed // LableValue supports escape char - bool escaped = false; - auto lPos = mPos; - while (mPos < mLine.size() && mLine[mPos] != '"') { - if (mLine[mPos] != '\\') { - if (escaped) { - mEscapedLabelValue.push_back(mLine[mPos]); - } - ++mPos; - ++mTokenLength; - } else { - if (escaped == false) { - // first meet escape char - escaped = true; - mEscapedLabelValue = mLine.substr(lPos, mPos - lPos).to_string(); - } - if (mPos + 1 < mLine.size()) { - // check next char, if it is valid escape char, we can consume two chars and push one escaped char - // if not, we neet to push the two chars - // valid escape char: \", \\, \n - switch (mLine[lPos + 1]) { - case '\\': - case '\"': - mEscapedLabelValue.push_back(mLine[mPos + 1]); - break; - case 'n': - mEscapedLabelValue.push_back('\n'); - break; - default: - mEscapedLabelValue.push_back('\\'); - mEscapedLabelValue.push_back(mLine[mPos + 1]); - break; + if (len == 0 || s[0] != '"') { + HandleError("invalid character in label value"); + return; + } + s = s + 1; + len--; + size_t nextPos = 0; + if (mEscape) { + // slow path + // escape char + string labelValue; + size_t pos = 0; + for (size_t i = 0; i < len; i++) { + if (s[i] == '\\') { + if (i + 1 < len) { + switch (s[i + 1]) { + case 'n': + labelValue.push_back('\n'); + break; + case '\\': + case '\"': + labelValue.push_back(s[i + 1]); + break; + default: + labelValue.push_back('\\'); + labelValue.push_back(s[i + 1]); + break; + } + i++; + } else { + HandleError("invalid escape char"); + return; } - mPos += 2; + } else if (s[i] == '"') { + pos = i; + break; } else { - mEscapedLabelValue.push_back(mLine[mPos + 1]); - ++mPos; + labelValue.push_back(s[i]); } } - } - - if (mPos == mLine.size()) { - HandleError("unexpected end of input in label value"); - return; - } - - if (!escaped) { - metricEvent.SetTagNoCopy(mLabelName, mLine.substr(mPos - mTokenLength, mTokenLength)); + auto sb = metricEvent.GetSourceBuffer()->CopyString(labelValue); + metricEvent.SetTag(mLabelName, StringView(sb.data, sb.size)); + nextPos = SkipLeadingWhitespace(s, len, pos + 1); } else { - metricEvent.SetTag(mLabelName.to_string(), mEscapedLabelValue); - mEscapedLabelValue.clear(); + const auto pos = FindFirstLetter(s, len, '"'); + if (pos.has_value()) { + metricEvent.SetTagNoCopy(mLabelName, StringView(s, pos.value())); + nextPos = SkipLeadingWhitespace(s, len, pos.value() + 1); + } else { + HandleError("invalid character in label value"); + return; + } } - mTokenLength = 0; - ++mPos; - SkipLeadingWhitespace(); - if (mPos < mLine.size() && (mLine[mPos] == ',' || mLine[mPos] == '}')) { - HandleCommaOrCloseBrace(metricEvent); + if (s[nextPos] == ',') { + nextPos++; + nextPos = SkipLeadingWhitespace(s, len, nextPos); + if (s[nextPos] == '}') { + nextPos++; + nextPos = SkipLeadingWhitespace(s, len, nextPos); + HandleSampleValue(metricEvent, s + nextPos, len - nextPos); + return; + } + HandleLabelName(metricEvent, s + nextPos, len - nextPos); + } else if (s[nextPos] == '}') { + nextPos++; + nextPos = SkipLeadingWhitespace(s, len, nextPos); + HandleSampleValue(metricEvent, s + nextPos, len - nextPos); } else { - HandleError("unexpected end of input in label value"); + HandleError("invalid character in label value"); } } -// parse:, k2="v2" } 9.9410452992e+10 1715829785083 # exemplarsxxx -// or parse:} 9.9410452992e+10 1715829785083 # exemplarsxxx -void TextParser::HandleCommaOrCloseBrace(MetricEvent& metricEvent) { - char c = (mPos < mLine.size()) ? mLine[mPos] : '\0'; - if (c == ',') { - ++mPos; - SkipLeadingWhitespace(); - HandleLabelName(metricEvent); - } else if (c == '}') { - ++mPos; - SkipLeadingWhitespace(); - HandleSampleValue(metricEvent); +void TextParser::HandleSampleValue(MetricEvent& metricEvent, const char* s, size_t len) { + auto pos = FindWhiteSpaceAndExemplar(s, len); + size_t valueLen = 0; + if (pos.has_value()) { + valueLen = pos.value(); } else { - HandleError("expected ',' or '}' after label value"); + valueLen = len; } -} - -// parse:9.9410452992e+10 1715829785083 # exemplarsxxx -void TextParser::HandleSampleValue(MetricEvent& metricEvent) { - while (mPos < mLine.size() && IsValidNumberChar(mLine[mPos])) { - ++mPos; - ++mTokenLength; - } - - if (mPos < mLine.size() && mLine[mPos] != ' ' && mLine[mPos] != '\t' && mLine[mPos] != '#') { - HandleError("unexpected end of input in sample value"); + if (valueLen == 0) { + HandleError("invalid sample value"); return; } - - auto tmpSampleValue = mLine.substr(mPos - mTokenLength, mTokenLength); - mDoubleStr = tmpSampleValue.to_string(); - + auto tmpSampleValue = StringView(s, valueLen); try { - mSampleValue = std::stod(mDoubleStr); + auto sampleValue = stod(tmpSampleValue.to_string()); + metricEvent.SetValue(sampleValue); } catch (...) { HandleError("invalid sample value"); - mTokenLength = 0; return; } - mDoubleStr.clear(); - - metricEvent.SetValue(mSampleValue); - mTokenLength = 0; - SkipLeadingWhitespace(); - if (mPos == mLine.size() || mLine[mPos] == '#' || !mHonorTimestamps) { + if ((pos.has_value() && s[pos.value()] == '#') || valueLen == len) { metricEvent.SetTimestamp(mDefaultTimestamp, mDefaultNanoTimestamp); mState = TextState::Done; - } else { - HandleTimestamp(metricEvent); + return; } + s = s + pos.value() + 1; + len -= pos.value() + 1; + auto nextPos = SkipLeadingWhitespace(s, len, 0); + HandleTimestamp(metricEvent, s + nextPos, len - nextPos); } - -// parse:1715829785083 # exemplarsxxx -// timestamp will be 1715829785.083 in OpenMetrics -void TextParser::HandleTimestamp(MetricEvent& metricEvent) { +void TextParser::HandleTimestamp(MetricEvent& metricEvent, const char* s, size_t len) { // '#' is for exemplars, and we don't need it - while (mPos < mLine.size() && IsValidNumberChar(mLine[mPos])) { - ++mPos; - ++mTokenLength; - } - if (mPos < mLine.size() && mLine[mPos] != ' ' && mLine[mPos] != '\t' && mLine[mPos] != '#') { - HandleError("unexpected end of input in sample timestamp"); - return; + auto pos = FindWhiteSpaceAndExemplar(s, len); + size_t valueLen = 0; + if (pos.has_value()) { + valueLen = pos.value(); + } else { + valueLen = len; } - - auto tmpTimestamp = mLine.substr(mPos - mTokenLength, mTokenLength); - if (tmpTimestamp.size() == 0) { + if (valueLen == 0) { mState = TextState::Done; return; } - mDoubleStr = tmpTimestamp.to_string(); + auto tmpTimestamp = StringView(s, valueLen); double milliTimestamp = 0; try { - milliTimestamp = stod(mDoubleStr); + milliTimestamp = stod(tmpTimestamp.to_string()); } catch (...) { HandleError("invalid timestamp"); - mTokenLength = 0; return; } - mDoubleStr.clear(); if (milliTimestamp > 1ULL << 63) { HandleError("timestamp overflow"); - mTokenLength = 0; return; } if (milliTimestamp < 1UL << 31) { @@ -318,21 +396,7 @@ void TextParser::HandleTimestamp(MetricEvent& metricEvent) { } else { metricEvent.SetTimestamp(mDefaultTimestamp, mDefaultNanoTimestamp); } - - mTokenLength = 0; - mState = TextState::Done; } -void TextParser::HandleError(const string& errMsg) { - LOG_WARNING(sLogger, ("text parser error parsing line", mLine.to_string() + errMsg)); - mState = TextState::Error; -} - -inline void TextParser::SkipLeadingWhitespace() { - while (mPos < mLine.length() && (mLine[mPos] == ' ' || mLine[mPos] == '\t')) { - mPos++; - } -} - -} // namespace logtail +} // namespace logtail::prom diff --git a/core/prometheus/labels/TextParser.h b/core/prometheus/labels/TextParser.h index 0eb8899667..afa1d12933 100644 --- a/core/prometheus/labels/TextParser.h +++ b/core/prometheus/labels/TextParser.h @@ -20,11 +20,13 @@ #include "models/MetricEvent.h" #include "models/PipelineEventGroup.h" +#include "models/StringView.h" -namespace logtail { +namespace logtail::prom { enum class TextState { Start, Done, Error }; +// no strict grammar for prom class TextParser { public: TextParser() = default; @@ -37,30 +39,26 @@ class TextParser { bool ParseLine(StringView line, MetricEvent& metricEvent); private: - void HandleError(const std::string& errMsg); + std::optional FindFirstLetter(const char* s, size_t len, char target); + std::optional FindFirstWhiteSpace(const char* s, size_t len); + std::optional FindWhiteSpaceAndExemplar(const char* s, size_t len); + + std::optional SkipTrailingWhitespace(const char* s, size_t pos); + inline size_t SkipLeadingWhitespace(const char* s, size_t len, size_t pos); - void HandleStart(MetricEvent& metricEvent); - void HandleMetricName(MetricEvent& metricEvent); - void HandleOpenBrace(MetricEvent& metricEvent); - void HandleLabelName(MetricEvent& metricEvent); - void HandleEqualSign(MetricEvent& metricEvent); - void HandleLabelValue(MetricEvent& metricEvent); - void HandleCommaOrCloseBrace(MetricEvent& metricEvent); - void HandleSampleValue(MetricEvent& metricEvent); - void HandleTimestamp(MetricEvent& metricEvent); - void HandleSpace(MetricEvent& metricEvent); + void HandleError(const std::string& errMsg); - inline void SkipLeadingWhitespace(); + void HandleStart(MetricEvent& metricEvent, const char* s, size_t len); + void HandleMetricName(MetricEvent& metricEvent, const char* s, size_t len); + void HandleLabelName(MetricEvent& metricEvent, const char* s, size_t len); + void HandleLabelValue(MetricEvent& metricEvent, const char* s, size_t len); + void HandleSampleValue(MetricEvent& metricEvent, const char* s, size_t len); + void HandleTimestamp(MetricEvent& metricEvent, const char* s, size_t len); TextState mState{TextState::Start}; - StringView mLine; - std::size_t mPos{0}; + bool mEscape{false}; StringView mLabelName; - std::string mEscapedLabelValue; - double mSampleValue{0.0}; - std::size_t mTokenLength{0}; - std::string mDoubleStr; bool mHonorTimestamps{true}; time_t mDefaultTimestamp{0}; @@ -71,4 +69,4 @@ class TextParser { #endif }; -} // namespace logtail +} // namespace logtail::prom diff --git a/core/unittest/CMakeLists.txt b/core/unittest/CMakeLists.txt index 41f1601069..bed74003ba 100644 --- a/core/unittest/CMakeLists.txt +++ b/core/unittest/CMakeLists.txt @@ -91,7 +91,7 @@ if (UNIX) endif() # add core subdir set(UT_BASE_TARGET "unittest_base") - add_definitions(-D__EXCLUDE_SPL__) + add_definitions(-D__EXCLUDE_SSE4_2__) add_library(${UT_BASE_TARGET} SHARED ${SOURCE_FILES_CORE}) target_compile_options(${UT_BASE_TARGET} PRIVATE -Werror) add_core_subdir() diff --git a/core/unittest/processor/ProcessorPromParseMetricNativeUnittest.cpp b/core/unittest/processor/ProcessorPromParseMetricNativeUnittest.cpp index c0481a1b94..4b8155e523 100644 --- a/core/unittest/processor/ProcessorPromParseMetricNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorPromParseMetricNativeUnittest.cpp @@ -14,14 +14,11 @@ * limitations under the License. */ -#include "LogEvent.h" #include "MetricEvent.h" #include "StringTools.h" #include "common/JsonUtil.h" #include "models/PipelineEventGroup.h" #include "plugin/processor/inner/ProcessorPromParseMetricNative.h" -#include "prometheus/Constants.h" -#include "prometheus/labels/TextParser.h" #include "prometheus/schedulers/ScrapeScheduler.h" #include "unittest/Unittest.h" @@ -76,7 +73,6 @@ void ProcessorParsePrometheusMetricUnittest::TestProcess() { APSARA_TEST_TRUE(processor.Init(config)); // make events - auto parser = TextParser(); auto splitByLines = [](const std::string& content) { PipelineEventGroup eGroup(std::make_shared()); diff --git a/core/unittest/processor/ProcessorPromRelabelMetricNativeUnittest.cpp b/core/unittest/processor/ProcessorPromRelabelMetricNativeUnittest.cpp index 39f4cde959..f5790cb92a 100644 --- a/core/unittest/processor/ProcessorPromRelabelMetricNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorPromRelabelMetricNativeUnittest.cpp @@ -24,7 +24,7 @@ using namespace std; -namespace logtail { +namespace logtail::prom { class ProcessorPromRelabelMetricNativeUnittest : public testing::Test { public: void SetUp() override { mContext.SetConfigName("project##config_0"); } @@ -281,6 +281,6 @@ UNIT_TEST_CASE(ProcessorPromRelabelMetricNativeUnittest, TestAddAutoMetrics) UNIT_TEST_CASE(ProcessorPromRelabelMetricNativeUnittest, TestHonorLabels) -} // namespace logtail +} // namespace logtail::prom UNIT_TEST_MAIN \ No newline at end of file diff --git a/core/unittest/prometheus/TextParserBenchmark.cpp b/core/unittest/prometheus/TextParserBenchmark.cpp index 7af03beb72..85028a46ab 100644 --- a/core/unittest/prometheus/TextParserBenchmark.cpp +++ b/core/unittest/prometheus/TextParserBenchmark.cpp @@ -21,7 +21,7 @@ using namespace std; -namespace logtail { +namespace logtail::prom { class TextParserBenchmark : public testing::Test { public: @@ -37,28 +37,20 @@ class TextParserBenchmark : public testing::Test { m100MData += mRawData; repeatCnt -= 1; } - - m1000MData.reserve(1000 * 1024 * 1024); - repeatCnt = 1000 * 1024 * 1024 / mRawData.size(); - while (repeatCnt > 0) { - m1000MData += mRawData; - repeatCnt -= 1; - } } private: std::string mRawData = R"""( -test_metric1{k1="v1", k2="v2"} 2.0 1234567890 -test_metric2{k1="v1",k2="v2"} 9.9410452992e+10 -test_metric3{k1="v1",k2="v2"} 9.9410452992e+10 1715829785083 -test_metric4{k1="v1", k2="v2" } 9.9410452992e+10 1715829785083 -test_metric5{k1="v1",k2="v2",} 9.9410452992e+10 1715829785083 -test_metric6{k1="v1",k2="v2", } 9.9410452992e+10 1715829785083 -test_metric7{k1="v1", k2="v2", } 9.9410452992e+10 1715829785083 -test_metric8{k1="v1", k2="v2", } 9.9410452992e+10 1715829785083 +test_metric1{k111111111111="v11111111111", k222222="v2"} 2.0 1234567890 +test_metric2{k111111111111="v11111111111",k222222="v2"} 9.9410452992e+10 +test_metric3{k111111111111="v11111111111",k222222="v2"} 9.9410452992e+10 1715829785083 +test_metric4{k111111111111="v11111111111", k222222="v2" } 9.9410452992e+10 1715829785083 +test_metric5{k111111111111="v11111111111",k222222="v2",} 9.9410452992e+10 1715829785083 +test_metric6{k111111111111="v11111111111",k222222="v2", } 9.9410452992e+10 1715829785083 +test_metric7{k111111111111="v11111111111", k222222="v2", } 9.9410452992e+10 1715829785083 +test_metric8{k111111111111="v11111111111", k222222="v2", } 9.9410452992e+10 1715829785083 )"""; std::string m100MData; - std::string m1000MData; }; void TextParserBenchmark::TestParse100M() const { @@ -70,26 +62,11 @@ void TextParserBenchmark::TestParse100M() const { auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed = end - start; cout << "elapsed: " << elapsed.count() << " seconds" << endl; - // elapsed: 1.53s in release mode - // elapsed: 551MB in release mode -} - -void TextParserBenchmark::TestParse1000M() const { - auto start = std::chrono::high_resolution_clock::now(); - - TextParser parser; - auto res = parser.Parse(m1000MData, 0, 0); - - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration elapsed = end - start; - cout << "elapsed: " << elapsed.count() << " seconds" << endl; - // elapsed: 15.4s in release mode - // elapsed: 4960MB in release mode + // 2.51s -> 1.26s if we use SIMD } UNIT_TEST_CASE(TextParserBenchmark, TestParse100M) -UNIT_TEST_CASE(TextParserBenchmark, TestParse1000M) -} // namespace logtail +} // namespace logtail::prom UNIT_TEST_MAIN diff --git a/core/unittest/prometheus/TextParserUnittest.cpp b/core/unittest/prometheus/TextParserUnittest.cpp index 7af513ba2e..0474f7a981 100644 --- a/core/unittest/prometheus/TextParserUnittest.cpp +++ b/core/unittest/prometheus/TextParserUnittest.cpp @@ -23,7 +23,7 @@ using namespace std; -namespace logtail { +namespace logtail::prom { bool IsDoubleEqual(double a, double b) { return fabs(a - b) < 0.000001; @@ -388,6 +388,6 @@ void TextParserUnittest::TestParseUnicodeLabelValue() { UNIT_TEST_CASE(TextParserUnittest, TestParseUnicodeLabelValue) -} // namespace logtail +} // namespace logtail::prom UNIT_TEST_MAIN