From 314785e56ba27ade75a07f2b007d476ef9ef8cc9 Mon Sep 17 00:00:00 2001 From: henryzhx8 Date: Tue, 12 Nov 2024 13:33:27 +0800 Subject: [PATCH 01/10] fix flusher_sls ut (#1874) --- core/plugin/flusher/sls/FlusherSLS.cpp | 2 +- core/unittest/flusher/FlusherSLSUnittest.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/core/plugin/flusher/sls/FlusherSLS.cpp b/core/plugin/flusher/sls/FlusherSLS.cpp index 97ab507be8..524b4550b9 100644 --- a/core/plugin/flusher/sls/FlusherSLS.cpp +++ b/core/plugin/flusher/sls/FlusherSLS.cpp @@ -477,7 +477,7 @@ bool FlusherSLS::Init(const Json::Value& config, Json::Value& optionalGoPipeline } DefaultFlushStrategyOptions strategy{ - static_cast(INT32_FLAG(max_send_log_group_size) / INT32_FLAG(sls_serialize_size_expansion_ratio)), + static_cast(INT32_FLAG(max_send_log_group_size) / DOUBLE_FLAG(sls_serialize_size_expansion_ratio)), static_cast(INT32_FLAG(batch_send_metric_size)), static_cast(INT32_FLAG(merge_log_count_limit)), static_cast(INT32_FLAG(batch_send_interval))}; diff --git a/core/unittest/flusher/FlusherSLSUnittest.cpp b/core/unittest/flusher/FlusherSLSUnittest.cpp index 1e1251fd51..73dc1e97e3 100644 --- a/core/unittest/flusher/FlusherSLSUnittest.cpp +++ b/core/unittest/flusher/FlusherSLSUnittest.cpp @@ -39,6 +39,7 @@ DECLARE_FLAG_INT32(batch_send_interval); DECLARE_FLAG_INT32(merge_log_count_limit); DECLARE_FLAG_INT32(batch_send_metric_size); DECLARE_FLAG_INT32(max_send_log_group_size); +DECLARE_FLAG_DOUBLE(sls_serialize_size_expansion_ratio); using namespace std; @@ -108,7 +109,7 @@ void FlusherSLSUnittest::OnSuccessfulInit() { APSARA_TEST_TRUE(flusher->mShardHashKeys.empty()); APSARA_TEST_EQUAL(static_cast(INT32_FLAG(merge_log_count_limit)), flusher->mBatcher.GetEventFlushStrategy().GetMinCnt()); - APSARA_TEST_EQUAL(static_cast(INT32_FLAG(max_send_log_group_size)), + APSARA_TEST_EQUAL(static_cast(INT32_FLAG(max_send_log_group_size) / DOUBLE_FLAG(sls_serialize_size_expansion_ratio)), flusher->mBatcher.GetEventFlushStrategy().GetMaxSizeBytes()); APSARA_TEST_EQUAL(static_cast(INT32_FLAG(batch_send_metric_size)), flusher->mBatcher.GetEventFlushStrategy().GetMinSizeBytes()); From 4741ec819d7267fa39e3a71ceb31ca9943c5b0bc Mon Sep 17 00:00:00 2001 From: Bingchang Chen Date: Tue, 12 Nov 2024 13:50:57 +0800 Subject: [PATCH 02/10] test: refactor E2E trigger (#1861) * test: refactor E2E trigger * fix * fix * fix * fix * fix * fix * fix * fix --- .github/workflows/benchmark.yaml | 7 + core/unittest/pipeline/PipelineUnittest.cpp | 1 + docker/Dockerfile.e2e-test | 5 +- .../case.feature | 2 +- .../case.feature | 2 +- .../case.feature | 2 +- .../case.feature | 2 +- .../case.feature | 2 +- test/config/config.go | 5 + test/engine/setup/docker_compose.go | 8 +- test/engine/steps.go | 36 ++-- .../trigger/{protocol.go => ebpf/http.go} | 4 +- test/engine/trigger/ebpf/remote_mmap.py | 22 ++ .../{ebpf_trigger.go => ebpf/security.go} | 27 +-- test/engine/trigger/file.go | 114 ---------- test/engine/trigger/generator/apsara_test.go | 73 ------- .../trigger/generator/delimiter_test.go | 127 ----------- .../trigger/generator/ebpf_file_mmap_test.go | 51 ----- test/engine/trigger/generator/helper.go | 85 -------- test/engine/trigger/generator/json_test.go | 119 ----------- test/engine/trigger/generator/regex_test.go | 198 ------------------ test/engine/trigger/helper.go | 21 +- test/engine/trigger/log/file.go | 105 ++++++++++ .../trigger/{http.go => log/http_server.go} | 2 +- test/engine/trigger/log/remote_file.py | 172 +++++++++++++++ .../trigger/log/remote_file_benchmark.py | 70 +++++++ test/engine/trigger/trigger.go | 96 --------- test/requirements.txt | 1 + 28 files changed, 449 insertions(+), 910 deletions(-) rename test/engine/trigger/{protocol.go => ebpf/http.go} (90%) create mode 100644 test/engine/trigger/ebpf/remote_mmap.py rename test/engine/trigger/{ebpf_trigger.go => ebpf/security.go} (76%) delete mode 100644 test/engine/trigger/file.go delete mode 100644 test/engine/trigger/generator/apsara_test.go delete mode 100644 test/engine/trigger/generator/delimiter_test.go delete mode 100644 test/engine/trigger/generator/ebpf_file_mmap_test.go delete mode 100644 test/engine/trigger/generator/helper.go delete mode 100644 test/engine/trigger/generator/json_test.go delete mode 100644 test/engine/trigger/generator/regex_test.go create mode 100644 test/engine/trigger/log/file.go rename test/engine/trigger/{http.go => log/http_server.go} (99%) create mode 100644 test/engine/trigger/log/remote_file.py create mode 100644 test/engine/trigger/log/remote_file_benchmark.py delete mode 100644 test/engine/trigger/trigger.go create mode 100644 test/requirements.txt diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index 5cc0ff4411..2f29670fca 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -37,6 +37,7 @@ jobs: strategy: matrix: go-version: [ 1.19.10 ] + python-version: [ 3.8 ] runner: [ ubuntu-latest ] fail-fast: true permissions: @@ -62,6 +63,11 @@ jobs: with: go-version: ${{ matrix.go-version }} + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Check out code uses: actions/checkout@v2 with: @@ -83,6 +89,7 @@ jobs: BUILD_LOGTAIL_UT: OFF WITHOUTGDB: ON run: | + pip3 install -r test/requirements.txt make benchmark git stash diff --git a/core/unittest/pipeline/PipelineUnittest.cpp b/core/unittest/pipeline/PipelineUnittest.cpp index 1cf92420d5..396e84627c 100644 --- a/core/unittest/pipeline/PipelineUnittest.cpp +++ b/core/unittest/pipeline/PipelineUnittest.cpp @@ -2916,6 +2916,7 @@ void PipelineUnittest::TestWaitAllItemsInProcessFinished() const { APSARA_TEST_NOT_EQUAL(std::future_status::ready, future.wait_for(std::chrono::seconds(0))); pipeline->mInProcessCnt.store(0); // recover + usleep(3000); APSARA_TEST_EQUAL(std::future_status::ready, future.wait_for(std::chrono::seconds(0))); } diff --git a/docker/Dockerfile.e2e-test b/docker/Dockerfile.e2e-test index 785df4caa3..e2acd47d42 100644 --- a/docker/Dockerfile.e2e-test +++ b/docker/Dockerfile.e2e-test @@ -1,12 +1,11 @@ -FROM golang:1.19 +FROM python:3.8 -RUN go env -w GOPROXY="https://goproxy.cn,direct" RUN mkdir -p /tmp/loongcollector WORKDIR /root COPY . ./loongcollector WORKDIR /root/loongcollector/test -RUN go mod download +RUN pip3 install -r requirements.txt CMD ["sh", "-c", "while true; do sleep 3600; done"] \ No newline at end of file diff --git a/test/benchmark/test_cases/performance_file_to_blackhole_filebeat/case.feature b/test/benchmark/test_cases/performance_file_to_blackhole_filebeat/case.feature index b8bbf86d1a..58e2ec8234 100644 --- a/test/benchmark/test_cases/performance_file_to_blackhole_filebeat/case.feature +++ b/test/benchmark/test_cases/performance_file_to_blackhole_filebeat/case.feature @@ -8,5 +8,5 @@ Feature: performance file to blackhole filebeat Given docker-compose boot type {benchmark} When start docker-compose {performance_file_to_blackhole_filebeat} When start monitor {filebeat} - When generate random nginx logs to file, speed {10}MB/s, total {3}min, to file {./a.log} + When generate random nginx logs to file, speed {10}MB/s, total {3}min, to file {./test_cases/performance_file_to_blackhole_filebeat/a.log} When wait monitor until log processing finished diff --git a/test/benchmark/test_cases/performance_file_to_blackhole_fluentbit/case.feature b/test/benchmark/test_cases/performance_file_to_blackhole_fluentbit/case.feature index 449511f10d..43de7c8c04 100644 --- a/test/benchmark/test_cases/performance_file_to_blackhole_fluentbit/case.feature +++ b/test/benchmark/test_cases/performance_file_to_blackhole_fluentbit/case.feature @@ -8,5 +8,5 @@ Feature: performance file to blackhole fluentbit Given docker-compose boot type {benchmark} When start docker-compose {performance_file_to_blackhole_fluentbit} When start monitor {fluent-bit} - When generate random nginx logs to file, speed {10}MB/s, total {3}min, to file {./a.log} + When generate random nginx logs to file, speed {10}MB/s, total {3}min, to file {./test_cases/performance_file_to_blackhole_fluentbit/a.log} When wait monitor until log processing finished diff --git a/test/benchmark/test_cases/performance_file_to_blackhole_ilogtail/case.feature b/test/benchmark/test_cases/performance_file_to_blackhole_ilogtail/case.feature index 92bb93f5d6..67e7913180 100644 --- a/test/benchmark/test_cases/performance_file_to_blackhole_ilogtail/case.feature +++ b/test/benchmark/test_cases/performance_file_to_blackhole_ilogtail/case.feature @@ -8,5 +8,5 @@ Feature: performance file to blackhole iLogtail Given docker-compose boot type {benchmark} When start docker-compose {performance_file_to_blackhole_ilogtail} When start monitor {ilogtailC} - When generate random nginx logs to file, speed {10}MB/s, total {3}min, to file {./a.log} + When generate random nginx logs to file, speed {10}MB/s, total {3}min, to file {./test_cases/performance_file_to_blackhole_ilogtail/a.log} When wait monitor until log processing finished diff --git a/test/benchmark/test_cases/performance_file_to_blackhole_ilogtailspl/case.feature b/test/benchmark/test_cases/performance_file_to_blackhole_ilogtailspl/case.feature index 327b8d27a1..0a2cc6403f 100644 --- a/test/benchmark/test_cases/performance_file_to_blackhole_ilogtailspl/case.feature +++ b/test/benchmark/test_cases/performance_file_to_blackhole_ilogtailspl/case.feature @@ -8,5 +8,5 @@ Feature: performance file to blackhole iLogtail Given docker-compose boot type {benchmark} When start docker-compose {performance_file_to_blackhole_ilogtailspl} When start monitor {ilogtailC} - When generate random nginx logs to file, speed {10}MB/s, total {3}min, to file {./a.log} + When generate random nginx logs to file, speed {10}MB/s, total {3}min, to file {./test_cases/performance_file_to_blackhole_ilogtailspl/a.log} When wait monitor until log processing finished diff --git a/test/benchmark/test_cases/performance_file_to_blackhole_vector/case.feature b/test/benchmark/test_cases/performance_file_to_blackhole_vector/case.feature index b0e54b85c1..334b2b3cbb 100644 --- a/test/benchmark/test_cases/performance_file_to_blackhole_vector/case.feature +++ b/test/benchmark/test_cases/performance_file_to_blackhole_vector/case.feature @@ -8,5 +8,5 @@ Feature: performance file to blackhole vector Given docker-compose boot type {benchmark} When start docker-compose {performance_file_to_blackhole_vector} When start monitor {vector} - When generate random nginx logs to file, speed {10}MB/s, total {3}min, to file {./a.log} + When generate random nginx logs to file, speed {10}MB/s, total {3}min, to file {./test_cases/performance_file_to_blackhole_vector/a.log} When wait monitor until log processing finished diff --git a/test/config/config.go b/test/config/config.go index 915bd08c6d..0ddd20c517 100644 --- a/test/config/config.go +++ b/test/config/config.go @@ -15,6 +15,7 @@ package config import ( "os" + "path/filepath" "strconv" "time" @@ -72,6 +73,10 @@ func ParseConfig() { TestConfig.GeneratedLogDir = "/tmp/loongcollector" } TestConfig.WorkDir = os.Getenv("WORK_DIR") + if len(TestConfig.WorkDir) == 0 { + testFileDir, _ := os.Getwd() + TestConfig.WorkDir = filepath.Dir(testFileDir) + } // SSH TestConfig.SSHUsername = os.Getenv("SSH_USERNAME") diff --git a/test/engine/setup/docker_compose.go b/test/engine/setup/docker_compose.go index 6a7e3fc4de..3a78dfb710 100644 --- a/test/engine/setup/docker_compose.go +++ b/test/engine/setup/docker_compose.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "os" + "os/exec" "path/filepath" "time" @@ -128,5 +129,10 @@ func (d *DockerComposeEnv) ExecOnLogtail(command string) (string, error) { } func (d *DockerComposeEnv) ExecOnSource(ctx context.Context, command string) (string, error) { - return "", fmt.Errorf("not implemented") + // exec on host of docker compose + fmt.Println(command) + cmd := exec.Command("sh", "-c", command) + output, err := cmd.CombinedOutput() + fmt.Println(string(output)) + return string(output), err } diff --git a/test/engine/steps.go b/test/engine/steps.go index 7b19db1988..336a5ff8ac 100644 --- a/test/engine/steps.go +++ b/test/engine/steps.go @@ -10,8 +10,11 @@ import ( "github.com/alibaba/ilogtail/test/engine/cleanup" "github.com/alibaba/ilogtail/test/engine/control" "github.com/alibaba/ilogtail/test/engine/setup" + "github.com/alibaba/ilogtail/test/engine/setup/monitor" "github.com/alibaba/ilogtail/test/engine/setup/subscriber" "github.com/alibaba/ilogtail/test/engine/trigger" + "github.com/alibaba/ilogtail/test/engine/trigger/ebpf" + "github.com/alibaba/ilogtail/test/engine/trigger/log" "github.com/alibaba/ilogtail/test/engine/verify" ) @@ -27,6 +30,7 @@ func ScenarioInitializer(ctx *godog.ScenarioContext) { ctx.Given(`^remove http config \{(.*)\}`, control.RemoveHTTPConfig) ctx.Given(`^subcribe data from \{(\S+)\} with config`, subscriber.InitSubscriber) ctx.Given(`^mkdir \{(.*)\}`, setup.Mkdir) + ctx.Given(`^docker-compose boot type \{(\S+)\}$`, setup.SetDockerComposeBootType) // ------------------------------------------ // When @@ -41,19 +45,25 @@ func ScenarioInitializer(ctx *godog.ScenarioContext) { // generate ctx.When(`^begin trigger`, trigger.BeginTrigger) - ctx.When(`^generate \{(\d+)\} regex logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, trigger.RegexSingle) - ctx.When(`^generate \{(\d+)\} multiline regex logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, trigger.RegexMultiline) - ctx.When(`^generate \{(\d+)\} regex gbk logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, trigger.RegexSingleGBK) - ctx.When(`^generate \{(\d+)\} http logs, with interval \{(\d+)\}ms, url: \{(.*)\}, method: \{(.*)\}, body:`, trigger.HTTP) - ctx.When(`^generate \{(\d+)\} apsara logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, trigger.Apsara) - ctx.When(`^generate \{(\d+)\} delimiter logs to file \{(.*)\}, with interval \{(\d+)\}ms, with delimiter \{(.*)\} and quote \{(.*)\}$`, trigger.DelimiterSingle) - ctx.When(`^generate \{(\d+)\} multiline delimiter logs to file \{(.*)\}, with interval \{(\d+)\}ms, with delimiter \{(.*)\} and quote \{(.*)\}$`, trigger.DelimiterMultiline) - ctx.When(`^generate \{(\d+)\} json logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, trigger.JSONSingle) - ctx.When(`^generate \{(\d+)\} multiline json logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, trigger.JSONMultiline) - ctx.When(`^execute \{(\d+)\} commands to generate process security events`, trigger.TrigerProcessSecurityEvents) - ctx.When(`^execute \{(\d+)\} commands to generate network security events on url \{(.*)\}$`, trigger.TrigerNetworksSecurityEvents) - ctx.When(`^execute \{(\d+)\} commands to generate file security events on files \{(.*)\}$`, trigger.TrigerFileSecurityEvents) - ctx.When(`^generate \{(\d+)\} HTTP requests, with interval \{(\d+)\}ms, url: \{(.*)\}`, trigger.TrigerHTTP) + // log + ctx.When(`^generate \{(\d+)\} regex logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, log.RegexSingle) + ctx.When(`^generate \{(\d+)\} multiline regex logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, log.RegexMultiline) + ctx.When(`^generate \{(\d+)\} regex gbk logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, log.RegexSingleGBK) + ctx.When(`^generate \{(\d+)\} http logs, with interval \{(\d+)\}ms, url: \{(.*)\}, method: \{(.*)\}, body:`, log.HTTP) + ctx.When(`^generate \{(\d+)\} apsara logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, log.Apsara) + ctx.When(`^generate \{(\d+)\} delimiter logs to file \{(.*)\}, with interval \{(\d+)\}ms, with delimiter \{(.*)\} and quote \{(.*)\}$`, log.DelimiterSingle) + ctx.When(`^generate \{(\d+)\} multiline delimiter logs to file \{(.*)\}, with interval \{(\d+)\}ms, with delimiter \{(.*)\} and quote \{(.*)\}$`, log.DelimiterMultiline) + ctx.When(`^generate \{(\d+)\} json logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, log.JSONSingle) + ctx.When(`^generate \{(\d+)\} multiline json logs to file \{(.*)\}, with interval \{(\d+)\}ms$`, log.JSONMultiline) + ctx.When(`^generate random nginx logs to file, speed \{(\d+)\}MB/s, total \{(\d+)\}min, to file \{(.*)\}`, log.Nginx) + ctx.When(`^start monitor \{(\S+)\}`, monitor.StartMonitor) + ctx.When(`^wait monitor until log processing finished$`, monitor.WaitMonitorUntilProcessingFinished) + + // ebpf + ctx.When(`^execute \{(\d+)\} commands to generate process security events`, ebpf.ProcessSecurityEvents) + ctx.When(`^execute \{(\d+)\} commands to generate network security events on url \{(.*)\}$`, ebpf.NetworksSecurityEvents) + ctx.When(`^execute \{(\d+)\} commands to generate file security events on files \{(.*)\}$`, ebpf.FileSecurityEvents) + ctx.When(`^generate \{(\d+)\} HTTP requests, with interval \{(\d+)\}ms, url: \{(.*)\}`, ebpf.HTTP) // ------------------------------------------ // Then diff --git a/test/engine/trigger/protocol.go b/test/engine/trigger/ebpf/http.go similarity index 90% rename from test/engine/trigger/protocol.go rename to test/engine/trigger/ebpf/http.go index 646a4aa97b..c3c84f9ddd 100644 --- a/test/engine/trigger/protocol.go +++ b/test/engine/trigger/ebpf/http.go @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package trigger +package ebpf import ( "context" @@ -22,7 +22,7 @@ import ( "github.com/alibaba/ilogtail/test/engine/setup" ) -func TrigerHTTP(ctx context.Context, count int, interval int, url string) (context.Context, error) { +func HTTP(ctx context.Context, count int, interval int, url string) (context.Context, error) { logger.Debugf(context.Background(), "count:%d interval:%d url:%s", count, interval, url) cmd := fmt.Sprintf("curl -vL %s", url) time.Sleep(time.Second * 5) diff --git a/test/engine/trigger/ebpf/remote_mmap.py b/test/engine/trigger/ebpf/remote_mmap.py new file mode 100644 index 0000000000..1a1efc5bd8 --- /dev/null +++ b/test/engine/trigger/ebpf/remote_mmap.py @@ -0,0 +1,22 @@ +import argparse +import mmap +import os + +def main(): + parser = argparse.ArgumentParser(description='mmap') + parser.add_argument('--commandCnt', type=int, default=10, help='command count') + parser.add_argument('--filename', type=str, default='/tmp/loongcollector/ebpfFileSecurityHook3.log', help='filename') + + args = parser.parse_args() + + with open(args.filename, 'w') as f: + fd = f.fileno() + for i in range(args.commandCnt): + mm = mmap.mmap(fd, 20, prot=mmap.PROT_READ | mmap.PROT_WRITE, flags=mmap.MAP_SHARED) + mm.close() + + os.remove(args.filename) + + +if __name__ == '__main__': + main() diff --git a/test/engine/trigger/ebpf_trigger.go b/test/engine/trigger/ebpf/security.go similarity index 76% rename from test/engine/trigger/ebpf_trigger.go rename to test/engine/trigger/ebpf/security.go index 4b824c557e..34bfc189c3 100644 --- a/test/engine/trigger/ebpf_trigger.go +++ b/test/engine/trigger/ebpf/security.go @@ -11,16 +11,16 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package trigger +package ebpf import ( "context" - "html/template" + "strconv" "strings" "time" - "github.com/alibaba/ilogtail/test/config" "github.com/alibaba/ilogtail/test/engine/setup" + "github.com/alibaba/ilogtail/test/engine/trigger" ) /* @@ -28,7 +28,7 @@ import ( input_process_security ******************** */ -func TrigerProcessSecurityEvents(ctx context.Context, commandCnt int) (context.Context, error) { +func ProcessSecurityEvents(ctx context.Context, commandCnt int) (context.Context, error) { time.Sleep(5 * time.Second) if err := execveCommands(ctx, commandCnt); err != nil { return ctx, err @@ -51,7 +51,7 @@ func execveCommands(ctx context.Context, commandCnt int) error { input_network_security ******************** */ -func TrigerNetworksSecurityEvents(ctx context.Context, commandCnt int, url string) (context.Context, error) { +func NetworksSecurityEvents(ctx context.Context, commandCnt int, url string) (context.Context, error) { time.Sleep(5 * time.Second) if err := curlURL(ctx, commandCnt, url); err != nil { return ctx, err @@ -74,9 +74,8 @@ func curlURL(ctx context.Context, commandCnt int, url string) error { input_file_security ******************** */ -const triggerFileSecurityTemplate = "cd {{.WorkDir}} && COMMAND_CNT={{.CommandCnt}} FILE_NAME={{.FileName}} {{.Command}}" -func TrigerFileSecurityEvents(ctx context.Context, commandCnt int, filenames string) (context.Context, error) { +func FileSecurityEvents(ctx context.Context, commandCnt int, filenames string) (context.Context, error) { time.Sleep(5 * time.Second) if err := rwFile(ctx, commandCnt, filenames); err != nil { return ctx, err @@ -112,20 +111,10 @@ func rwFile(ctx context.Context, commandCnt int, filenames string) error { } func mmapFile(ctx context.Context, commandCnt int, filenames string) error { - mmapFileCommand := getRunTriggerCommand("TestGenerateMmapCommand") files := strings.Split(filenames, ",") for _, file := range files { - var triggerEBPFCommand strings.Builder - template := template.Must(template.New("trigger").Parse(triggerFileSecurityTemplate)) - if err := template.Execute(&triggerEBPFCommand, map[string]interface{}{ - "WorkDir": config.TestConfig.WorkDir, - "CommandCnt": commandCnt, - "FileName": file, - "Command": mmapFileCommand, - }); err != nil { - return err - } - if _, err := setup.Env.ExecOnSource(ctx, triggerEBPFCommand.String()); err != nil { + mmapFileCommand := trigger.GetRunTriggerCommand("ebpf", "mmap", "commandCnt", strconv.FormatInt(int64(commandCnt), 10), "filename", file) + if _, err := setup.Env.ExecOnSource(ctx, mmapFileCommand); err != nil { return err } } diff --git a/test/engine/trigger/file.go b/test/engine/trigger/file.go deleted file mode 100644 index 373aebbe52..0000000000 --- a/test/engine/trigger/file.go +++ /dev/null @@ -1,114 +0,0 @@ -package trigger - -import ( - "context" - "fmt" - "math/rand" - "os" - "path/filepath" - "time" - - "golang.org/x/time/rate" - - "github.com/alibaba/ilogtail/test/config" -) - -// JSON template -func GenerateRandomNginxLogToFile(ctx context.Context, speed, totalTime int, path string) (context.Context, error) { - - // clear file - path = filepath.Clean(path) - path = filepath.Join(config.CaseHome, path) - fmt.Println(path) - _ = os.WriteFile(path, []byte{}, 0600) - file, _ := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) // #nosec G304 - - rand.Seed(time.Now().UnixNano()) - maxLogLen := 1024 - nginxLog := genNginxLog() - - limiter := rate.NewLimiter(rate.Limit(speed*1024*1024), maxLogLen) - - timeout := time.After(time.Minute * time.Duration(totalTime)) - - for { - select { - // context is done - case <-ctx.Done(): - // clear file - _ = file.Close() - return ctx, nil - // all time is done - case <-timeout: - // clear file - _ = file.Close() - return ctx, nil - default: - if limiter.AllowN(time.Now(), len(nginxLog)) { - _, _ = file.WriteString(nginxLog + "\n") // #nosec G307 - nginxLog = genNginxLog() - } - } - } -} - -var ipAddresses = []string{ - "103.159.151.180", - "12.55.18.241", - "182.233.128.102", - "221.85.57.231", - "76.245.65.224", - "86.250.231.93", - "44.201.253.252", - "218.7.2.219", - "172.118.174.109", - "208.16.46.154", - "7.138.80.41", - "214.73.25.80", - "83.124.20.79", - "80.226.48.153", - "92.129.204.161", - "212.103.145.159", - "148.188.8.90", - "148.212.244.121", - "106.186.172.157", - "30.127.196.158", -} - -var userAgents = []string{ - "aliyun-sdk-java", - "aliyun-sdk-golang", - "aliyun-sdk-python", -} - -var statusCodes = []string{ - "400", - "401", - "402", - "403", - "404", - "200", -} - -const bytesMean = 5500.0 -const bytesStddev = 1500.0 - -func genNginxLog() string { - nginxLogTemplate := `%s - - [%s] "GET http://www.districtdot-com.biz/syndicate HTTP/1.1" %s %d "http://www.chiefscalable.biz/webservices" "%s"` - currentTime := time.Now().Format("02/Jan/2006:15:04:05 +0800") - ipAddress := ipAddresses[rand.Intn(len(ipAddresses))] // #nosec G404 - statusIdx := rand.Intn(len(statusCodes) * 10) // #nosec G404 - if statusIdx >= len(statusCodes) { - statusIdx = len(statusCodes) - 1 - } - bytesSize := int32(rand.NormFloat64()*bytesStddev + bytesMean) - if bytesSize < 1000 { - bytesSize = 0 - } else if bytesSize > 10000 { - bytesSize = 10000 - } - statusCode := statusCodes[statusIdx] - userAgent := userAgents[rand.Intn(len(userAgents))] // #nosec G404 - - return fmt.Sprintf(nginxLogTemplate, ipAddress, currentTime, statusCode, bytesSize, userAgent) -} diff --git a/test/engine/trigger/generator/apsara_test.go b/test/engine/trigger/generator/apsara_test.go deleted file mode 100644 index 40329c0cfe..0000000000 --- a/test/engine/trigger/generator/apsara_test.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2024 iLogtail Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package generator - -import ( - "fmt" - "math/rand" - "os" - "strconv" - "testing" - "time" -) - -// TestGenerateApsara will be executed in the environment being collected. -func TestGenerateApsara(t *testing.T) { - config, err := getGenerateFileLogConfigFromEnv() - if err != nil { - t.Fatalf("get generate file log config from env failed: %v", err) - return - } - testLogContentTmpl := string2Template([]string{ - "[{{.Time}}]\t[{{.Level}}]\t[32337]\t[/build/core/application/Application:12]\tfile:file{{.FileNo}}\tlogNo:{{.LogNo}}\tmark:{{.Mark}}\tmsg:hello world!\n", - "[{{.Time}}]\t[{{.Level}}]\t[20964]\t[/build/core/ilogtail.cpp:127]\tfile:file{{.FileNo}}\tlogNo:{{.LogNo}}\tmark:{{.Mark}}\tmsg:这是一条消息\n", - "[{{.Time}}]\t[{{.Level}}]\t[32337]\t[/build/core/ilogtail.cpp:127]\tfile:file{{.FileNo}}\tlogNo:{{.LogNo}}\tmark:{{.Mark}}\tmsg:hello world!\n", - "[{{.Time}}]\t[{{.Level}}]\t[32337]\t[/build/core/ilogtail.cpp:127]\tfile:file{{.FileNo}}\tlogNo:{{.LogNo}}\tmark:{{.Mark}}\tmsg:这是一条消息\n", - "[{{.Time}}]\t[{{.Level}}]\t[00001]\t[/build/core/ilogtail.cpp:127]\tfile:file{{.FileNo}}\tlogNo:{{.LogNo}}\tmark:{{.Mark}}\tmsg:password:123456\n", - }) - file, err := os.OpenFile(fmt.Sprintf("%s/%s", config.GeneratedLogDir, config.FileName), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) - if err != nil { - t.Fatalf("open file failed: %v", err) - return - } - defer file.Close() - - logIndex := 0 - logNo := rand.Intn(10000) - fileNo := rand.Intn(10000) - for i := 0; i < config.TotalLog; i++ { - var currentTime string - if i%2 == 0 { - currentTime = time.Now().Format("2006-01-02 15:04:05.000000") - } else { - currentTime = strconv.FormatInt(time.Now().UnixNano()/1000, 10) - } - err = testLogContentTmpl[logIndex].Execute(file, map[string]interface{}{ - "Time": currentTime, - "Level": getRandomLogLevel(), - "LogNo": logNo + i, - "FileNo": fileNo, - "Mark": getRandomMark(), - }) - if err != nil { - t.Fatalf("write log failed: %v", err) - return - } - time.Sleep(time.Duration(config.Interval * int(time.Millisecond))) - logIndex++ - if logIndex >= len(testLogContentTmpl) { - logIndex = 0 - } - } -} diff --git a/test/engine/trigger/generator/delimiter_test.go b/test/engine/trigger/generator/delimiter_test.go deleted file mode 100644 index 354b6dea8c..0000000000 --- a/test/engine/trigger/generator/delimiter_test.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2024 iLogtail Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package generator - -import ( - "fmt" - "math/rand" - "os" - "testing" - "time" -) - -// TestGenerateDelimiterSingle will be executed in the environment being collected. -func TestGenerateDelimiterSingle(t *testing.T) { - config, err := getGenerateFileLogConfigFromEnv("Delimiter", "Quote") - if err != nil { - t.Fatalf("get generate file log config from env failed: %v", err) - return - } - delimiter := config.Custom["Delimiter"] - if delimiter == "" { - delimiter = " " - } - quote := config.Custom["Quote"] - if quote == "" { - quote = "" - } - testLogContentTmpl := string2Template([]string{ - "{{.Quote}}{{.Mark}}{{.Quote}}{{.Delimiter}}{{.Quote}}file{{.FileNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}{{.LogNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}0.0.0.0{{.Quote}}{{.Delimiter}}{{.Quote}}{{.Time}}{{.Quote}}{{.Delimiter}}{{.Quote}}GET{{.Quote}}{{.Delimiter}}{{.Quote}}/index.html{{.Quote}}{{.Delimiter}}{{.Quote}}HTTP/2.0{{.Quote}}{{.Delimiter}}{{.Quote}}302{{.Quote}}{{.Delimiter}}{{.Quote}}628{{.Quote}}{{.Delimiter}}{{.Quote}}curl/7.10{{.Quote}}\n", - "{{.Quote}}{{.Mark}}{{.Quote}}{{.Delimiter}}{{.Quote}}file{{.FileNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}{{.LogNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}10.45.26.0{{.Quote}}{{.Delimiter}}{{.Quote}}{{.Time}}{{.Quote}}{{.Delimiter}}{{.Quote}}GET{{.Quote}}{{.Delimiter}}{{.Quote}}/{{.Quote}}{{.Delimiter}}{{.Quote}}HTTP/2.0{{.Quote}}{{.Delimiter}}{{.Quote}}302{{.Quote}}{{.Delimiter}}{{.Quote}}218{{.Quote}}{{.Delimiter}}{{.Quote}}go-sdk{{.Quote}}\n", - "{{.Quote}}{{.Mark}}{{.Quote}}{{.Delimiter}}{{.Quote}}file{{.FileNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}{{.LogNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}10.45.26.0{{.Quote}}{{.Delimiter}}{{.Quote}}{{.Time}}{{.Quote}}{{.Delimiter}}{{.Quote}}GET{{.Quote}}{{.Delimiter}}{{.Quote}}/dir/resource.txt{{.Quote}}{{.Delimiter}}{{.Quote}}HTTP/1.1{{.Quote}}{{.Delimiter}}{{.Quote}}404{{.Quote}}{{.Delimiter}}{{.Quote}}744{{.Quote}}{{.Delimiter}}{{.Quote}}Mozilla/5.0{{.Quote}}\n", - "{{.Quote}}{{.Mark}}{{.Quote}}{{.Delimiter}}{{.Quote}}file{{.FileNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}{{.LogNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}127.0.0.1{{.Quote}}{{.Delimiter}}{{.Quote}}{{.Time}}{{.Quote}}{{.Delimiter}}{{.Quote}}PUT{{.Quote}}{{.Delimiter}}{{.Quote}}/{{.Quote}}{{.Delimiter}}{{.Quote}}HTTP/2.0{{.Quote}}{{.Delimiter}}{{.Quote}}200{{.Quote}}{{.Delimiter}}{{.Quote}}320{{.Quote}}{{.Delimiter}}{{.Quote}}curl/7.10{{.Quote}}\n", - "{{.Quote}}{{.Mark}}{{.Quote}}{{.Delimiter}}{{.Quote}}file{{.FileNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}{{.LogNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}192.168.0.3{{.Quote}}{{.Delimiter}}{{.Quote}}{{.Time}}{{.Quote}}{{.Delimiter}}{{.Quote}}PUT{{.Quote}}{{.Delimiter}}{{.Quote}}/dir/resource.txt{{.Quote}}{{.Delimiter}}{{.Quote}}HTTP/1.1{{.Quote}}{{.Delimiter}}{{.Quote}}404{{.Quote}}{{.Delimiter}}{{.Quote}}949{{.Quote}}{{.Delimiter}}{{.Quote}}curl/7.10{{.Quote}}\n", - }) - file, err := os.OpenFile(fmt.Sprintf("%s/%s", config.GeneratedLogDir, config.FileName), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) - if err != nil { - t.Fatalf("open file failed: %v", err) - return - } - defer file.Close() - - logIndex := 0 - logNo := rand.Intn(10000) - fileNo := rand.Intn(10000) - for i := 0; i < config.TotalLog; i++ { - err = testLogContentTmpl[logIndex].Execute(file, map[string]interface{}{ - "Mark": getRandomMark(), - "FileNo": fileNo, - "LogNo": logNo, - "Time": time.Now().Format("2006-01-02 15:04:05.000000000"), - "Delimiter": delimiter, - "Quote": quote, - }) - if err != nil { - t.Fatalf("write log failed: %v", err) - return - } - time.Sleep(time.Duration(config.Interval * int(time.Millisecond))) - logIndex++ - if logIndex >= len(testLogContentTmpl) { - logIndex = 0 - } - } -} - -// TestGenerateDelimiterMultiline will be executed in the environment being collected. -func TestGenerateDelimiterMultiline(t *testing.T) { - config, err := getGenerateFileLogConfigFromEnv("Delimiter", "Quote") - if err != nil { - t.Fatalf("get generate file log config from env failed: %v", err) - return - } - delimiter := config.Custom["Delimiter"] - if delimiter == "" { - delimiter = " " - } - quote := config.Custom["Quote"] - if quote == "" { - quote = "" - } - testLogContentTmpl := string2Template([]string{ - "{{.Quote}}F{{.Quote}}{{.Delimiter}}{{.Quote}}file{{.FileNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}{{.LogNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}0.0.0.0{{.Quote}}{{.Delimiter}}{{.Quote}}{{.Time}}{{.Quote}}{{.Delimiter}}{{.Quote}}GET{{.Quote}}{{.Delimiter}}{{.Quote}}/index.html{{.Quote}}{{.Delimiter}}{{.Quote}}\nHTTP\n/2.0{{.Quote}}{{.Delimiter}}{{.Quote}}302{{.Quote}}{{.Delimiter}}{{.Quote}}628{{.Quote}}{{.Delimiter}}{{.Quote}}curl/7.10{{.Quote}}\n", - "{{.Quote}}-{{.Quote}}{{.Delimiter}}{{.Quote}}file{{.FileNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}{{.LogNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}10.45.26.0{{.Quote}}{{.Delimiter}}{{.Quote}}{{.Time}}{{.Quote}}{{.Delimiter}}{{.Quote}}GET{{.Quote}}{{.Delimiter}}{{.Quote}}/{{.Quote}}{{.Delimiter}}{{.Quote}}\nHTTP\n/2.0{{.Quote}}{{.Delimiter}}{{.Quote}}302{{.Quote}}{{.Delimiter}}{{.Quote}}218{{.Quote}}{{.Delimiter}}{{.Quote}}go-sdk{{.Quote}}\n", - "{{.Quote}}F{{.Quote}}{{.Delimiter}}{{.Quote}}file{{.FileNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}{{.LogNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}10.45.26.0{{.Quote}}{{.Delimiter}}{{.Quote}}{{.Time}}{{.Quote}}{{.Delimiter}}{{.Quote}}GET{{.Quote}}{{.Delimiter}}{{.Quote}}/dir/resource.txt{{.Quote}}{{.Delimiter}}{{.Quote}}\nHTTP\n/1.1{{.Quote}}{{.Delimiter}}{{.Quote}}404{{.Quote}}{{.Delimiter}}{{.Quote}}744{{.Quote}}{{.Delimiter}}{{.Quote}}Mozilla/5.0{{.Quote}}\n", - "{{.Quote}}-{{.Quote}}{{.Delimiter}}{{.Quote}}file{{.FileNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}{{.LogNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}127.0.0.1{{.Quote}}{{.Delimiter}}{{.Quote}}{{.Time}}{{.Quote}}{{.Delimiter}}{{.Quote}}PUT{{.Quote}}{{.Delimiter}}{{.Quote}}/{{.Quote}}{{.Delimiter}}{{.Quote}}\nHTTP\n/2.0{{.Quote}}{{.Delimiter}}{{.Quote}}200{{.Quote}}{{.Delimiter}}{{.Quote}}320{{.Quote}}{{.Delimiter}}{{.Quote}}curl/7.10{{.Quote}}\n", - "{{.Quote}}F{{.Quote}}{{.Delimiter}}{{.Quote}}file{{.FileNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}{{.LogNo}}{{.Quote}}{{.Delimiter}}{{.Quote}}192.168.0.3{{.Quote}}{{.Delimiter}}{{.Quote}}{{.Time}}{{.Quote}}{{.Delimiter}}{{.Quote}}PUT{{.Quote}}{{.Delimiter}}{{.Quote}}/dir/resource.txt{{.Quote}}{{.Delimiter}}{{.Quote}}\nHTTP\n/1.1{{.Quote}}{{.Delimiter}}{{.Quote}}404{{.Quote}}{{.Delimiter}}{{.Quote}}949{{.Quote}}{{.Delimiter}}{{.Quote}}curl/7.10{{.Quote}}\n", - }) - file, err := os.OpenFile(fmt.Sprintf("%s/%s", config.GeneratedLogDir, config.FileName), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) - if err != nil { - t.Fatalf("open file failed: %v", err) - return - } - defer file.Close() - - logIndex := 0 - logNo := rand.Intn(10000) - fileNo := rand.Intn(10000) - for i := 0; i < config.TotalLog; i++ { - err = testLogContentTmpl[logIndex].Execute(file, map[string]interface{}{ - "FileNo": fileNo, - "LogNo": logNo, - "Time": time.Now().Format("2006-01-02 15:04:05.000000000"), - "Delimiter": delimiter, - "Quote": quote, - }) - if err != nil { - t.Fatalf("write log failed: %v", err) - return - } - time.Sleep(time.Duration(config.Interval * int(time.Millisecond))) - logIndex++ - if logIndex >= len(testLogContentTmpl) { - logIndex = 0 - } - } -} diff --git a/test/engine/trigger/generator/ebpf_file_mmap_test.go b/test/engine/trigger/generator/ebpf_file_mmap_test.go deleted file mode 100644 index 56e221b54d..0000000000 --- a/test/engine/trigger/generator/ebpf_file_mmap_test.go +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2024 iLogtail Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package generator - -import ( - "os" - "strconv" - "syscall" - "testing" -) - -func TestGenerateMmapCommand(t *testing.T) { - commandCnt := getEnvOrDefault("COMMAND_CNT", "10") - commandCntNum, err := strconv.Atoi(commandCnt) - if err != nil { - t.Fatalf("parse COMMAND_CNT failed: %v", err) - return - } - filename := getEnvOrDefault("FILE_NAME", "/tmp/loongcollector/ebpfFileSecurityHook3.log") - f, err := os.Create(filename) - if err != nil { - panic(err) - } - fd := int(f.Fd()) - for i := 0; i < commandCntNum; i++ { - b, innerErr := syscall.Mmap(fd, 0, 20, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) - if innerErr != nil { - panic(innerErr) - } - innerErr = syscall.Munmap(b) - if innerErr != nil { - panic(innerErr) - } - } - err = os.Remove(filename) - if err != nil { - t.Fatalf("remove file failed: %v", err) - return - } -} diff --git a/test/engine/trigger/generator/helper.go b/test/engine/trigger/generator/helper.go deleted file mode 100644 index 2514bb2614..0000000000 --- a/test/engine/trigger/generator/helper.go +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2024 iLogtail Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package generator - -import ( - "crypto/rand" - "fmt" - "math/big" - "os" - "strconv" - "text/template" - - "github.com/pkg/errors" -) - -var Levels = []string{"ERROR", "INFO", "DEBUG", "WARNING"} - -type GenerateFileLogConfig struct { - GeneratedLogDir string - TotalLog int - Interval int - FileName string - Custom map[string]string -} - -func getGenerateFileLogConfigFromEnv(customKeys ...string) (*GenerateFileLogConfig, error) { - gneratedLogDir := getEnvOrDefault("GENERATED_LOG_DIR", "/tmp/loongcollector") - totalLog, err := strconv.Atoi(getEnvOrDefault("TOTAL_LOG", "100")) - if err != nil { - return nil, errors.Wrap(err, "parse TOTAL_LOG failed") - } - interval, err := strconv.Atoi(getEnvOrDefault("INTERVAL", "1")) - if err != nil { - return nil, errors.Wrap(err, "parse INTERVAL failed") - } - fileName := getEnvOrDefault("FILENAME", "default.log") - custom := make(map[string]string) - for _, key := range customKeys { - custom[key] = getEnvOrDefault(key, "") - } - return &GenerateFileLogConfig{ - GeneratedLogDir: gneratedLogDir, - TotalLog: totalLog, - Interval: interval, - FileName: fileName, - Custom: custom, - }, nil -} - -func string2Template(strings []string) []*template.Template { - templates := make([]*template.Template, len(strings)) - for i, str := range strings { - templates[i], _ = template.New(fmt.Sprintf("template_%d", i)).Parse(str) - } - return templates -} - -func getRandomLogLevel() string { - randInt, _ := rand.Int(rand.Reader, big.NewInt(int64(len(Levels)))) - return Levels[randInt.Int64()] -} - -func getRandomMark() string { - marks := []string{"-", "F"} - randInt, _ := rand.Int(rand.Reader, big.NewInt(int64(len(marks)))) - return marks[randInt.Int64()] -} - -func getEnvOrDefault(env, fallback string) string { - if value, ok := os.LookupEnv(env); ok { - return value - } - return fallback -} diff --git a/test/engine/trigger/generator/json_test.go b/test/engine/trigger/generator/json_test.go deleted file mode 100644 index 0b4ef51b33..0000000000 --- a/test/engine/trigger/generator/json_test.go +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright 2024 iLogtail Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package generator - -import ( - "fmt" - "math/rand" - "os" - "strconv" - "testing" - "time" -) - -// TestGenerateJSONSingle will be executed in the environment being collected. -func TestGenerateJSONSingle(t *testing.T) { - config, err := getGenerateFileLogConfigFromEnv() - if err != nil { - t.Fatalf("get generate file log config from env failed: %v", err) - return - } - testLogContentTmpl := string2Template([]string{ - `{"mark":"{{.Mark}}","file":"file{{.FileNo}}","logNo":{{.LogNo}},"time":"{{.Time}}","ip":"0.0.0.0","method":"POST","userAgent":"mozilla firefox","size":263} -`, - `{"mark":"{{.Mark}}","file":"file{{.FileNo}}","logNo":{{.LogNo}},"time":"{{.Time}}","ip":"0.0.0.0","method":"GET","userAgent":"go-sdk","size":569} -`, - `{"mark":"{{.Mark}}","file":"file{{.FileNo}}","logNo":{{.LogNo}},"time":"{{.Time}}","ip":"0.0.0.0","method":"HEAD","userAgent":"go-sdk","size":210} -`, - `{"mark":"{{.Mark}}","file":"file{{.FileNo}}","logNo":{{.LogNo}},"time":"{{.Time}}","ip":"192.168.0.3","method":"PUT","userAgent":"curl/7.10","size":267} -`, - }) - file, err := os.OpenFile(fmt.Sprintf("%s/%s", config.GeneratedLogDir, config.FileName), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) - if err != nil { - t.Fatalf("open file failed: %v", err) - return - } - defer file.Close() - - logIndex := 0 - logNo := rand.Intn(10000) - fileNo := rand.Intn(10000) - for i := 0; i < config.TotalLog; i++ { - var currentTime string - if i%2 == 0 { - currentTime = time.Now().Format("2006-01-02T15:04:05.999999999") - } else { - currentTime = strconv.FormatInt(time.Now().UnixNano()/1000, 10) - } - testLogContentTmpl[logIndex].Execute(file, map[string]interface{}{ - "Mark": getRandomMark(), - "FileNo": fileNo, - "LogNo": logNo + i, - "Time": currentTime, - }) - - time.Sleep(time.Duration(config.Interval * int(time.Millisecond))) - logIndex++ - if logIndex >= len(testLogContentTmpl) { - logIndex = 0 - } - } -} - -func TestGenerateJSONMultiline(t *testing.T) { - config, err := getGenerateFileLogConfigFromEnv() - if err != nil { - t.Fatalf("get generate file log config from env failed: %v", err) - return - } - testLogContentTmpl := string2Template([]string{ - `{"mark":"{{.Mark}}","file":"file{{.FileNo}}","logNo":{{.LogNo}},"time":"{{.Time}}","ip":"0.0.0.0","method":"POST","userAgent":"mozilla firefox", -"size":263} -`, - `{"mark":"{{.Mark}}","file":"file{{.FileNo}}","logNo":{{.LogNo}},"time":"{{.Time}}","ip":"0.0.0.0","method":"GET","userAgent":"go-sdk", -"size":569} -`, - `{"mark":"{{.Mark}}","file":"file{{.FileNo}}","logNo":{{.LogNo}},"time":"{{.Time}}","ip":"0.0.0.0","method":"HEAD","userAgent":"go-sdk", -"size":210} -`, - `{"mark":"{{.Mark}}","file":"file{{.FileNo}}","logNo":{{.LogNo}},"time":"{{.Time}}","ip":"192.168.0.3","method":"PUT","userAgent":"curl/7.10", -"size":267} -`, - }) - file, err := os.OpenFile(fmt.Sprintf("%s/%s", config.GeneratedLogDir, config.FileName), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) - if err != nil { - t.Fatalf("open file failed: %v", err) - return - } - defer file.Close() - - logIndex := 0 - logNo := rand.Intn(10000) - fileNo := rand.Intn(10000) - for i := 0; i < config.TotalLog; i++ { - currentTime := time.Now().Format("2006-01-02T15:04:05.999999999") - testLogContentTmpl[logIndex].Execute(file, map[string]interface{}{ - "Mark": getRandomMark(), - "FileNo": fileNo, - "LogNo": logNo + i, - "Time": currentTime, - }) - - time.Sleep(time.Duration(config.Interval * int(time.Millisecond))) - logIndex++ - if logIndex >= len(testLogContentTmpl) { - logIndex = 0 - } - } -} diff --git a/test/engine/trigger/generator/regex_test.go b/test/engine/trigger/generator/regex_test.go deleted file mode 100644 index c50e65dc6e..0000000000 --- a/test/engine/trigger/generator/regex_test.go +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright 2024 iLogtail Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package generator - -import ( - "bytes" - "fmt" - "io" - "math/rand" - "os" - "testing" - "time" - - "golang.org/x/text/encoding/simplifiedchinese" - "golang.org/x/text/transform" -) - -// TestGenerateRegexLogSingle will be executed in the environment being collected. -func TestGenerateRegexLogSingle(t *testing.T) { - config, err := getGenerateFileLogConfigFromEnv() - if err != nil { - t.Fatalf("get config failed: %v", err) - return - } - testLogContentTmpl := string2Template([]string{ - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} 127.0.0.1 - [{{.Time}}] "HEAD / HTTP/2.0" 302 809 "未知" "这是一条消息,password:123456" -`, - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} 127.0.0.1 - [{{.Time}}] "GET /index.html HTTP/2.0" 200 139 "Mozilla/5.0" "这是一条消息,password:123456,这是第二条消息,password:00000" -`, - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} 10.45.26.0 - [{{.Time}}] "PUT /index.html HTTP/1.1" 200 913 "curl/7.10" "这是一条消息" -`, - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} 192.168.0.3 - [{{.Time}}] "PUT /dir/resource.txt HTTP/2.0" 501 355 "go-sdk" "这是一条消息,password:123456" -`, - }) - file, err := os.OpenFile(fmt.Sprintf("%s/%s", config.GeneratedLogDir, config.FileName), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) - if err != nil { - t.Fatalf("open file failed: %v", err) - return - } - defer file.Close() - - logIndex := 0 - logNo := rand.Intn(10000) - fileNo := rand.Intn(10000) - location, err := time.LoadLocation("Asia/Shanghai") - if err != nil { - t.Fatalf("load location failed: %v", err) - return - } - for i := 0; i < config.TotalLog; i++ { - err = testLogContentTmpl[logIndex].Execute(file, map[string]interface{}{ - "Time": time.Now().In(location).Format("2006-01-02T15:04:05.000000"), - "Mark": getRandomMark(), - "FileNo": fileNo, - "LogNo": logNo + i, - }) - if err != nil { - t.Fatalf("write log failed: %v", err) - return - } - time.Sleep(time.Duration(config.Interval * int(time.Millisecond))) - logIndex++ - if logIndex >= len(testLogContentTmpl) { - logIndex = 0 - } - } -} - -// TestGenerateRegexLogSingleGBK will be executed in the environment being collected. -func TestGenerateRegexLogSingleGBK(t *testing.T) { - config, err := getGenerateFileLogConfigFromEnv() - if err != nil { - t.Fatalf("get config failed: %v", err) - return - } - encoder := simplifiedchinese.GBK.NewEncoder() - testLogContentTmpl := string2Template([]string{ - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} 127.0.0.1 - [{{.Time}}] "HEAD / HTTP/2.0" 302 809 "未知" "这是一条消息,password:123456" -`, - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} 127.0.0.1 - [{{.Time}}] "GET /index.html HTTP/2.0" 200 139 "Mozilla/5.0" "这是一条消息,password:123456,这是第二条消息,password:00000" -`, - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} 10.45.26.0 - [{{.Time}}] "PUT /index.html HTTP/1.1" 200 913 "curl/7.10" "这是一条消息" -`, - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} 192.168.0.3 - [{{.Time}}] "PUT /dir/resource.txt HTTP/2.0" 501 355 "go-sdk" "这是一条消息,password:123456" -`, - }) - file, err := os.OpenFile(fmt.Sprintf("%s/%s", config.GeneratedLogDir, config.FileName), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) - if err != nil { - t.Fatalf("open file failed: %v", err) - return - } - defer file.Close() - - logIndex := 0 - logNo := rand.Intn(10000) - fileNo := rand.Intn(10000) - location, err := time.LoadLocation("Asia/Shanghai") - if err != nil { - t.Fatalf("load location failed: %v", err) - return - } - for i := 0; i < config.TotalLog; i++ { - var buffer bytes.Buffer - _ = testLogContentTmpl[logIndex].Execute(&buffer, map[string]interface{}{ - "Time": time.Now().In(location).Format("2006-01-02T15:04:05.000000"), - "Mark": getRandomMark(), - "FileNo": fileNo, - "LogNo": logNo + i, - }) - data, err1 := io.ReadAll(transform.NewReader(&buffer, encoder)) - if err1 != nil { - t.Fatalf("encode log failed: %v", err1) - } - _, err := io.WriteString(file, string(data)) - if err != nil { - t.Fatalf("write log failed: %v", err) - return - } - time.Sleep(time.Duration(config.Interval * int(time.Millisecond))) - logIndex++ - if logIndex >= len(testLogContentTmpl) { - logIndex = 0 - } - } -} - -func TestGenerateRegexLogMultiline(t *testing.T) { - config, err := getGenerateFileLogConfigFromEnv() - if err != nil { - t.Fatalf("get config failed: %v", err) - return - } - testLogContentTmpl := string2Template([]string{ - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} [{{.Time}}] [{{.Level}}] java.lang.Exception: exception happened -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.f1(RegexMultiLog.java:73) -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.run(RegexMultiLog.java:34) -at java.base/java.lang.Thread.run(Thread.java:833) -`, - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} [{{.Time}}] [{{.Level}}] java.lang.Exception: 发生异常 -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.f2(RegexMultiLog.java:80) -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.f1(RegexMultiLog.java:75) -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.run(RegexMultiLog.java:34) -at java.base/java.lang.Thread.run(Thread.java:833) -`, - `{{.Mark}} file{{.FileNo}}:{{.LogNo}} [{{.Time}}] [{{.Level}}] java.lang.Exception: exception happened -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.f5(RegexMultiLog.java:100) -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.f4(RegexMultiLog.java:96) -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.f3(RegexMultiLog.java:89) -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.f2(RegexMultiLog.java:82) -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.f1(RegexMultiLog.java:75) -at com.aliyun.sls.devops.logGenerator.type.RegexMultiLog.run(RegexMultiLog.java:34) -at java.base/java.lang.Thread.run(Thread.java:833) -`, - }) - file, err := os.OpenFile(fmt.Sprintf("%s/%s", config.GeneratedLogDir, config.FileName), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) - if err != nil { - t.Fatalf("open file failed: %v", err) - return - } - defer file.Close() - logIndex := 0 - logNo := rand.Intn(10000) - fileNo := rand.Intn(10000) - location, err := time.LoadLocation("Asia/Shanghai") - if err != nil { - t.Fatalf("load location failed: %v", err) - return - } - for i := 0; i < config.TotalLog; i++ { - err = testLogContentTmpl[logIndex].Execute(file, map[string]interface{}{ - "Time": time.Now().In(location).Format("2006-01-02T15:04:05.000000"), - "Level": getRandomLogLevel(), - "FileNo": fileNo, - "LogNo": logNo + i, - "Mark": getRandomMark(), - }) - if err != nil { - t.Fatalf("write log failed: %v", err) - return - } - time.Sleep(time.Duration(config.Interval * int(time.Millisecond))) - logIndex++ - if logIndex >= len(testLogContentTmpl) { - logIndex = 0 - } - } -} diff --git a/test/engine/trigger/helper.go b/test/engine/trigger/helper.go index 8e0fb12171..696c63d0b2 100644 --- a/test/engine/trigger/helper.go +++ b/test/engine/trigger/helper.go @@ -14,11 +14,26 @@ package trigger import ( + "context" "fmt" + "path/filepath" + "strings" + "time" + + "github.com/alibaba/ilogtail/test/config" ) -const commandTemplate = "/usr/local/go/bin/go test -count=1 -v -run ^%s$ github.com/alibaba/ilogtail/test/engine/trigger/generator" +const commandTemplate = "python3 %s.py %s" + +func BeginTrigger(ctx context.Context) (context.Context, error) { + startTime := time.Now().Unix() + return context.WithValue(ctx, config.StartTimeContextKey, int32(startTime)), nil +} -func getRunTriggerCommand(triggerName string) string { - return fmt.Sprintf(commandTemplate, triggerName) +func GetRunTriggerCommand(scenrio, triggerName string, kvs ...string) string { + args := make([]string, 0) + for i := 0; i < len(kvs); i += 2 { + args = append(args, fmt.Sprintf("--%s", kvs[i]), kvs[i+1]) + } + return fmt.Sprintf(commandTemplate, filepath.Join(config.TestConfig.WorkDir, "engine", "trigger", scenrio, "remote_"+triggerName), strings.Join(args, " ")) } diff --git a/test/engine/trigger/log/file.go b/test/engine/trigger/log/file.go new file mode 100644 index 0000000000..ddea428021 --- /dev/null +++ b/test/engine/trigger/log/file.go @@ -0,0 +1,105 @@ +// Copyright 2024 iLogtail Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package log + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "strings" + "time" + + "github.com/alibaba/ilogtail/test/engine/setup" + "github.com/alibaba/ilogtail/test/engine/trigger" +) + +func RegexSingle(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { + return generate(ctx, "regex", path, totalLog, interval) +} + +func RegexSingleGBK(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { + return generate(ctx, "regexGBK", path, totalLog, interval) +} + +func RegexMultiline(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { + return generate(ctx, "regexMultiline", path, totalLog, interval) +} + +func JSONSingle(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { + return generate(ctx, "json", path, totalLog, interval) +} + +func JSONMultiline(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { + return generate(ctx, "jsonMultiline", path, totalLog, interval) +} + +func Apsara(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { + return generate(ctx, "apsara", path, totalLog, interval) +} + +func DelimiterSingle(ctx context.Context, totalLog int, path string, interval int, delimiter, quote string) (context.Context, error) { + return generate(ctx, "delimiter", path, totalLog, interval, "delimiter", delimiter, "quote", quote) +} + +func DelimiterMultiline(ctx context.Context, totalLog int, path string, interval int, delimiter, quote string) (context.Context, error) { + return generate(ctx, "delimiterMultiline", path, totalLog, interval, "delimiter", delimiter, "quote", quote) +} + +func Nginx(ctx context.Context, rate, duration int, path string) (context.Context, error) { + return generateBenchmark(ctx, "nginx", path, rate, duration) +} + +func generate(ctx context.Context, mode, path string, count, interval int, customKV ...string) (context.Context, error) { + time.Sleep(3 * time.Second) + customKVString := make(map[string]string) + for i := 0; i < len(customKV); i += 2 { + customKVString[customKV[i]] = customKV[i+1] + } + jsonStr, err := json.Marshal(customKVString) + if err != nil { + return ctx, err + } + command := trigger.GetRunTriggerCommand("log", "file", "mode", mode, "path", path, "count", strconv.Itoa(count), "interval", strconv.Itoa(interval), "custom", wrapperCustomArgs(string(jsonStr))) + fmt.Println(command) + go func() { + if _, err := setup.Env.ExecOnSource(ctx, command); err != nil { + fmt.Println(err) + } + }() + return ctx, nil +} + +func generateBenchmark(ctx context.Context, mode, path string, rate, duration int, customKV ...string) (context.Context, error) { + time.Sleep(3 * time.Second) + customKVString := make(map[string]string) + for i := 0; i < len(customKV); i += 2 { + customKVString[customKV[i]] = customKV[i+1] + } + jsonStr, err := json.Marshal(customKVString) + if err != nil { + return ctx, err + } + command := trigger.GetRunTriggerCommand("log", "file_benchmark", "mode", mode, "path", path, "rate", strconv.Itoa(rate), "duration", strconv.Itoa(duration), "custom", wrapperCustomArgs(string(jsonStr))) + if _, err := setup.Env.ExecOnSource(ctx, command); err != nil { + return ctx, err + } + return ctx, nil +} + +func wrapperCustomArgs(customArgs string) string { + fmt.Println(customArgs) + customArgs = strings.ReplaceAll(customArgs, "\\", "\\\\") + return "\"" + strings.ReplaceAll(customArgs, "\"", "\\\"") + "\"" +} diff --git a/test/engine/trigger/http.go b/test/engine/trigger/log/http_server.go similarity index 99% rename from test/engine/trigger/http.go rename to test/engine/trigger/log/http_server.go index 4a580e4c16..d2eec5b961 100644 --- a/test/engine/trigger/http.go +++ b/test/engine/trigger/log/http_server.go @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package trigger +package log import ( "context" diff --git a/test/engine/trigger/log/remote_file.py b/test/engine/trigger/log/remote_file.py new file mode 100644 index 0000000000..1415c48e36 --- /dev/null +++ b/test/engine/trigger/log/remote_file.py @@ -0,0 +1,172 @@ +import argparse +import json as jsonlib +import logging +import random +import time + +from logging.handlers import TimedRotatingFileHandler +from datetime import datetime +from faker import Faker +from faker.providers import internet, user_agent, lorem, misc + + +def apsara(args, logger, faker): + fileNo = random.randint(1, 1000) + for i in range(args.count): + logger.info(f'[{datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")}]\t[{get_random_level()}]\t[{random.randint(1, 10000)}]\t[/build/core/application/Application:{i}]\tfile:file{fileNo}\tlogNo:{i}\tmark:{get_random_mark()}\tmsg:{faker.sentence()}') + if args.interval > 0: + time.sleep(args.interval / 1000) + +def delimiter(args, logger, faker): + custom_args = args.custom + quote = custom_args.get('quote', '') + delimiter = custom_args.get('delimiter', ' ') + fileNo = random.randint(1, 1000) + for i in range(args.count): + logParts = [ + f'{quote}{get_random_mark()}{quote}', + f'{quote}file{fileNo}{quote}', + f'{quote}logNo:{i}{quote}', + f'{quote}{faker.ipv4()}{quote}', + f'{quote}{datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")}{quote}', + f'{quote}{faker.http_method()}{quote}', + f'{quote}{faker.uri_path()}{quote}', + f'{quote}HTTP/2.0{quote}', + f'{quote}{faker.http_status_code()}{quote}', + f'{quote}{random.randint(1, 10000)}{quote}', + f'{quote}{faker.user_agent()}{quote}' + ] + log = delimiter.join(logParts) + logger.info(log) + if args.interval > 0: + time.sleep(args.interval / 1000) + +def delimiterMultiline(args, logger, faker): + custom_args = args.custom + quote = custom_args.get('quote', '') + delimiter = custom_args.get('delimiter', ' ') + fileNo = random.randint(1, 1000) + for i in range(args.count): + logParts = [ + f'{quote}{get_random_mark()}{quote}', + f'{quote}fi\nle{fileNo}{quote}', + f'{quote}logNo\n:{i}{quote}', + f'{quote}{faker.ipv4()}{quote}', + f'{quote}{datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")}{quote}', + f'{quote}{faker.http_method()}{quote}', + f'{quote}{faker.uri_path()}{quote}', + f'{quote}HT\nTP/2.0{quote}', + f'{quote}{faker.http_status_code()}{quote}', + f'{quote}{random.randint(1, 10000)}{quote}', + f'{quote}{faker.user_agent()}{quote}' + ] + log = delimiter.join(logParts) + logger.info(log) + if args.interval > 0: + time.sleep(args.interval / 1000) + +def json(args, logger, faker): + fileNo = random.randint(1, 1000) + for i in range(args.count): + logger.info(f'{{"mark":"{get_random_mark()}", "file":"file{fileNo}", "logNo":{i}, "time":"{datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")}", "ip": "{faker.ipv4()}", "method": "{faker.http_method()}", "userAgent": "{faker.user_agent()}", "size": {random.randint(1, 10000)}}}') + if args.interval > 0: + time.sleep(args.interval / 1000) + +def jsonMultiline(args, logger, faker): + fileNo = random.randint(1, 1000) + for i in range(args.count): + logParts = [ + f'"mark":"{get_random_mark()}"', + f'"file":"file{fileNo}"', + f'"logNo":{i}', + f'"time":"{datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")}"', + f'"ip": "{faker.ipv4()}"', + f'"method": "{faker.http_method()}"', + f'"userAgent": "{faker.user_agent()}"', + f'"size": {random.randint(1, 10000)}' + ] + log = '{' + ',\n'.join(logParts) + '}' + logger.info(log) + if args.interval > 0: + time.sleep(args.interval / 1000) + +def regex(args, logger, faker): + fileNo = random.randint(1, 1000) + for i in range(args.count): + logger.info(f'{get_random_mark()} file{fileNo}:{i} {faker.ipv4()} - [{datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")}] "{faker.http_method()} {faker.uri_path()} HTTP/2.0" {faker.http_status_code()} {random.randint(1, 10000)} "{faker.user_agent()}" "{faker.sentence()}"') + if args.interval > 0: + time.sleep(args.interval / 1000) + +def regexGBK(args, logger, faker): + fileNo = random.randint(1, 1000) + for i in range(args.count): + log = f'{get_random_mark()} file{fileNo}:{i} {faker.ipv4()} - [{datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")}] "{faker.http_method()} {faker.uri_path()} HTTP/2.0" {faker.http_status_code()} {random.randint(1, 10000)} "{faker.user_agent()}" "{faker.sentence()}"' + logger.info(str(log.encode('gbk'))) + if args.interval > 0: + time.sleep(args.interval / 1000) + +def regexMultiline(args, logger, faker): + fileNo = random.randint(1, 1000) + for i in range(args.count): + multilineLog = '\n'.join(faker.sentences(nb=random.randint(1, 5))) + logger.info(f'{get_random_mark()} file{fileNo}:{i} [{datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")}] [{get_random_level()}] java.lang.Exception: {multilineLog}') + + if args.interval > 0: + time.sleep(args.interval / 1000) + +def get_random_level(): + return random.choice(['DEBUG', 'INFO', 'WARNING', 'ERROR']) + +def get_random_mark(): + return random.choice(['-', 'F']) + +def parse_custom_arg_to_dict(custom_arg): + return jsonlib.loads(custom_arg) + +def main(): + parser = argparse.ArgumentParser(description='Log Generator Arg Parser') + parser.add_argument('--mode', type=str, default='regex', help='Log Type') + parser.add_argument('--path', type=str, default='default.log', help='Log Path') + parser.add_argument('--count', type=int, default=100, help='Log Count') + parser.add_argument('--interval', type=int, default=1, help='Log Interval (ms), < 0 means no interval') + parser.add_argument('--custom', type=parse_custom_arg_to_dict, help='Custom Args, in the format of json') + + args = parser.parse_args() + + logger = logging.getLogger('log_generator') + logger.setLevel(logging.INFO) + # 快速轮转来模拟比较极端的情况 + handler = TimedRotatingFileHandler(args.path, when="s", interval=5, backupCount=3) + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + handler.flush = lambda: handler.stream.flush() + logger.addHandler(handler) + + # 随机生成器 + faker = Faker() + faker.add_provider(internet) + faker.add_provider(user_agent) + faker.add_provider(lorem) + faker.add_provider(misc) + + # 生成数据 + if args.mode == 'apsara': + apsara(args, logger, faker) + elif args.mode == 'delimiter': + delimiter(args, logger, faker) + elif args.mode == 'delimiterMultiline': + delimiterMultiline(args, logger, faker) + elif args.mode == 'json': + json(args, logger, faker) + elif args.mode == 'jsonMultiline': + jsonMultiline(args, logger, faker) + elif args.mode == 'regex': + regex(args, logger, faker) + elif args.mode == 'regexGBK': + regexGBK(args, logger, faker) + elif args.mode == 'regexMultiline': + regexMultiline(args, logger, faker) + + +if __name__ == '__main__': + main() diff --git a/test/engine/trigger/log/remote_file_benchmark.py b/test/engine/trigger/log/remote_file_benchmark.py new file mode 100644 index 0000000000..b9f53e00da --- /dev/null +++ b/test/engine/trigger/log/remote_file_benchmark.py @@ -0,0 +1,70 @@ +import argparse +import json +import logging +import math +import random +import time + +from logging.handlers import TimedRotatingFileHandler +from datetime import datetime +from faker import Faker +from faker.providers import internet, user_agent, lorem, misc + +BATCH_SIZE = 100 + +def nginx(args, logger, faker): + startTime = time.perf_counter() + exampleLog = '' + for _ in range(BATCH_SIZE): + exampleLog += f'{faker.ipv4()} - - [{datetime.now().strftime("%d/%b/%Y:%H:%M:%S %z")}] "{faker.http_method()} {faker.url()} HTTP/1.1" {faker.http_status_code()} {random.randint(1, 10000)} "{faker.url()}" "{faker.user_agent()}\n"' + randomLogCost = (time.perf_counter() - startTime) / BATCH_SIZE + writeTimePerSecond = math.floor(args.rate * 1024 * 1024 / (len(exampleLog.encode('utf-8')))) + sleepInterval = 1 / writeTimePerSecond - randomLogCost + + startTime = datetime.now() + while True: + now = datetime.now() + fakeLog = f'{faker.ipv4()} - - [{now.strftime("%d/%b/%Y:%H:%M:%S %z")}] "{faker.http_method()} {faker.url()} HTTP/1.1" {faker.http_status_code()} {random.randint(1, 10000)} "{faker.url()}" "{faker.user_agent()}"\n' * BATCH_SIZE + logger.info(fakeLog[:-1]) + if sleepInterval > 0: + start = time.perf_counter() + while (time.perf_counter() - start) < sleepInterval: + pass + if (now - startTime).seconds > args.duration * 60: + break + +def parse_custom_arg_to_dict(custom_arg): + return json.loads(custom_arg) + +def main(): + parser = argparse.ArgumentParser(description='Log Generator Arg Parser') + parser.add_argument('--mode', type=str, default='nginx', help='Log Type') + parser.add_argument('--path', type=str, default='default.log', help='Log Path') + parser.add_argument('--rate', type=int, default=10, help='Log Generate Rate (MB/s)') + parser.add_argument('--duration', type=int, default=60, help='Log Generate Duration (min)') + parser.add_argument('--custom', nargs='*', type=parse_custom_arg_to_dict, help='Custom Args, in the format of key=value') + + args = parser.parse_args() + + logger = logging.getLogger('log_generator') + logger.setLevel(logging.INFO) + # 快速轮转来模拟比较极端的情况 + handler = TimedRotatingFileHandler(args.path, when="s", interval=70, backupCount=3) + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + + # 随机生成器 + faker = Faker() + faker.add_provider(internet) + faker.add_provider(user_agent) + faker.add_provider(lorem) + faker.add_provider(misc) + + # 生成数据 + if args.mode == 'nginx': + nginx(args, logger, faker) + + +if __name__ == '__main__': + main() diff --git a/test/engine/trigger/trigger.go b/test/engine/trigger/trigger.go deleted file mode 100644 index 63653c5cb6..0000000000 --- a/test/engine/trigger/trigger.go +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2024 iLogtail Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package trigger - -import ( - "context" - "strings" - "text/template" - "time" - - "github.com/alibaba/ilogtail/test/config" - "github.com/alibaba/ilogtail/test/engine/setup" -) - -const triggerTemplate = "cd {{.WorkDir}} && TOTAL_LOG={{.TotalLog}} INTERVAL={{.Interval}} FILENAME={{.Filename}} GENERATED_LOG_DIR={{.GeneratedLogDir}} {{.Custom}} {{.Command}}" - -func RegexSingle(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { - return generate(ctx, totalLog, path, interval, "TestGenerateRegexLogSingle") -} - -func RegexSingleGBK(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { - return generate(ctx, totalLog, path, interval, "TestGenerateRegexLogSingleGBK") -} - -func RegexMultiline(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { - return generate(ctx, totalLog, path, interval, "TestGenerateRegexLogMultiline") -} - -func JSONSingle(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { - return generate(ctx, totalLog, path, interval, "TestGenerateJSONSingle") -} - -func JSONMultiline(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { - return generate(ctx, totalLog, path, interval, "TestGenerateJSONMultiline") -} - -func Apsara(ctx context.Context, totalLog int, path string, interval int) (context.Context, error) { - return generate(ctx, totalLog, path, interval, "TestGenerateApsara") -} - -func DelimiterSingle(ctx context.Context, totalLog int, path string, interval int, delimiter, quote string) (context.Context, error) { - return generate(ctx, totalLog, path, interval, "TestGenerateDelimiterSingle", "Delimiter", delimiter, "Quote", quote) -} - -func DelimiterMultiline(ctx context.Context, totalLog int, path string, interval int, delimiter, quote string) (context.Context, error) { - return generate(ctx, totalLog, path, interval, "TestGenerateDelimiterMultiline", "Delimiter", delimiter, "Quote", quote) -} - -func generate(ctx context.Context, totalLog int, path string, interval int, commandName string, customKV ...string) (context.Context, error) { - time.Sleep(3 * time.Second) - command := getRunTriggerCommand(commandName) - var triggerCommand strings.Builder - template := template.Must(template.New("trigger").Parse(triggerTemplate)) - splittedPath := strings.Split(path, "/") - dir := strings.Join(splittedPath[:len(splittedPath)-1], "/") - filename := splittedPath[len(splittedPath)-1] - customString := strings.Builder{} - for i := 0; i < len(customKV); i++ { - customString.WriteString(customKV[i]) - customString.WriteString("=") - customString.WriteString(customKV[i+1]) - customString.WriteString(" ") - i++ - } - if err := template.Execute(&triggerCommand, map[string]interface{}{ - "WorkDir": config.TestConfig.WorkDir, - "TotalLog": totalLog, - "Interval": interval, - "GeneratedLogDir": dir, - "Filename": filename, - "Custom": customString.String(), - "Command": command, - }); err != nil { - return ctx, err - } - if _, err := setup.Env.ExecOnSource(ctx, triggerCommand.String()); err != nil { - return ctx, err - } - return ctx, nil -} - -func BeginTrigger(ctx context.Context) (context.Context, error) { - startTime := time.Now().Unix() - return context.WithValue(ctx, config.StartTimeContextKey, int32(startTime)), nil -} diff --git a/test/requirements.txt b/test/requirements.txt new file mode 100644 index 0000000000..ea45cd03b7 --- /dev/null +++ b/test/requirements.txt @@ -0,0 +1 @@ +Faker \ No newline at end of file From 0fb74e6473089bf7ba1afbdb14db7eecb019da18 Mon Sep 17 00:00:00 2001 From: Bingchang Chen Date: Tue, 12 Nov 2024 23:35:18 +0800 Subject: [PATCH 03/10] test: network chaos and restart agent (#1877) * test: network chaos and restart agent * fix lint --- test/config/config.go | 11 +- test/config/context.go | 1 + test/engine/cleanup/cache.go | 28 ---- test/engine/cleanup/chaos.go | 74 ++++++++++ test/engine/cleanup/helper.go | 19 ++- test/engine/control/agent.go | 48 +++++++ test/engine/setup/chaos/network.go | 156 +++++++++++++++++++++ test/engine/setup/controller/kubernetes.go | 18 +-- test/engine/setup/subscriber/sls.go | 42 +++++- test/engine/steps.go | 8 ++ test/engine/trigger/log/file.go | 1 - test/requirements.txt | 2 +- 12 files changed, 355 insertions(+), 53 deletions(-) delete mode 100644 test/engine/cleanup/cache.go create mode 100644 test/engine/cleanup/chaos.go create mode 100644 test/engine/control/agent.go create mode 100644 test/engine/setup/chaos/network.go diff --git a/test/config/config.go b/test/config/config.go index 0ddd20c517..ccbb41d7d1 100644 --- a/test/config/config.go +++ b/test/config/config.go @@ -17,6 +17,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "time" "github.com/alibaba/ilogtail/pkg/logger" @@ -46,7 +47,6 @@ type Config struct { AccessKeySecret string `mapstructure:"access_key_secret" yaml:"access_key_secret"` Endpoint string `mapstructure:"endpoint" yaml:"endpoint"` Aliuid string `mapstructure:"aliuid" yaml:"aliuid"` - QueryEndpoint string `mapstructure:"query_endpoint" yaml:"query_endpoint"` Region string `mapstructure:"region" yaml:"region"` RetryTimeout time.Duration `mapstructure:"retry_timeout" yaml:"retry_timeout"` } @@ -94,7 +94,6 @@ func ParseConfig() { TestConfig.AccessKeySecret = os.Getenv("ACCESS_KEY_SECRET") TestConfig.Endpoint = os.Getenv("ENDPOINT") TestConfig.Aliuid = os.Getenv("ALIUID") - TestConfig.QueryEndpoint = os.Getenv("QUERY_ENDPOINT") TestConfig.Region = os.Getenv("REGION") timeout, err := strconv.ParseInt(os.Getenv("RETRY_TIMEOUT"), 10, 64) if err != nil { @@ -102,3 +101,11 @@ func ParseConfig() { } TestConfig.RetryTimeout = time.Duration(timeout) * time.Second } + +func GetQueryEndpoint() string { + idx := strings.Index(TestConfig.Endpoint, "-intranet") + if idx == -1 { + return TestConfig.Endpoint + } + return TestConfig.Endpoint[:idx] + TestConfig.Endpoint[idx+9:] +} diff --git a/test/config/context.go b/test/config/context.go index d6f4c0d57a..6181498732 100644 --- a/test/config/context.go +++ b/test/config/context.go @@ -23,4 +23,5 @@ const ( CurrentWorkingDeploymentKey ContextKey = "currentWorkingDeployment" QueryKey ContextKey = "query" AgentPIDKey ContextKey = "agentPID" + EndpointIPKey ContextKey = "endpointIP" ) diff --git a/test/engine/cleanup/cache.go b/test/engine/cleanup/cache.go deleted file mode 100644 index 18d73319bb..0000000000 --- a/test/engine/cleanup/cache.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2024 iLogtail Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package cleanup - -import ( - "context" - - "github.com/alibaba/ilogtail/test/engine/setup" -) - -func GoTestCache(ctx context.Context) (context.Context, error) { - command := "/usr/local/go/bin/go clean -testcache" - if _, err := setup.Env.ExecOnSource(ctx, command); err != nil { - return ctx, err - } - return ctx, nil -} diff --git a/test/engine/cleanup/chaos.go b/test/engine/cleanup/chaos.go new file mode 100644 index 0000000000..f9e171f793 --- /dev/null +++ b/test/engine/cleanup/chaos.go @@ -0,0 +1,74 @@ +// Copyright 2024 iLogtail Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package cleanup + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/alibaba/ilogtail/test/engine/setup" +) + +type ChaosStatus struct { + Code int `json:"code"` + Success bool `json:"success"` + Result []map[string]string `json:"result"` +} + +func DestoryAllChaos(ctx context.Context) (context.Context, error) { + switch setup.Env.GetType() { + case "host": + command := "/opt/chaosblade/blade status --type create --status Success" + response, err := setup.Env.ExecOnLogtail(command) + if err != nil { + return ctx, err + } + var status ChaosStatus + if err = json.Unmarshal([]byte(response), &status); err != nil { + return ctx, err + } + for _, result := range status.Result { + command = "/opt/chaosblade/blade destroy " + result["Uid"] + if _, err := setup.Env.ExecOnLogtail(command); err != nil { + fmt.Println("Destroy chaos failed: ", err) + } + } + case "daemonset", "deployment": + k8sEnv := setup.Env.(*setup.K8sEnv) + chaosDir := filepath.Join("test_cases", "chaos") + err := filepath.Walk(chaosDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + if filepath.Ext(path) != ".yaml" { + return nil + } + return k8sEnv.Delete(path[len("test_cases/"):]) + }) + if err != nil { + return ctx, err + } + // delete chaosDir + if err = os.RemoveAll(chaosDir); err != nil { + return ctx, err + } + } + return ctx, nil +} diff --git a/test/engine/cleanup/helper.go b/test/engine/cleanup/helper.go index c223800104..cae8862f14 100644 --- a/test/engine/cleanup/helper.go +++ b/test/engine/cleanup/helper.go @@ -15,6 +15,7 @@ package cleanup import ( "context" + "fmt" "os" "os/signal" "syscall" @@ -44,10 +45,20 @@ func All() { return } ctx := context.TODO() - _, _ = control.RemoveAllLocalConfig(ctx) - _, _ = AllGeneratedLog(ctx) - _, _ = GoTestCache(ctx) - _, _ = DeleteContainers(ctx) + red := "\033[31m" + reset := "\033[0m" + if _, err := control.RemoveAllLocalConfig(ctx); err != nil { + fmt.Println(red + err.Error() + reset) + } + if _, err := AllGeneratedLog(ctx); err != nil { + fmt.Println(red + err.Error() + reset) + } + if _, err := DestoryAllChaos(ctx); err != nil { + fmt.Println(red + err.Error() + reset) + } + if _, err := DeleteContainers(ctx); err != nil { + fmt.Println(red + err.Error() + reset) + } if subscriber.TestSubscriber != nil { _ = subscriber.TestSubscriber.Stop() } diff --git a/test/engine/control/agent.go b/test/engine/control/agent.go new file mode 100644 index 0000000000..bc800b39ed --- /dev/null +++ b/test/engine/control/agent.go @@ -0,0 +1,48 @@ +// Copyright 2024 iLogtail Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package control + +import ( + "context" + "fmt" + "strings" + + "github.com/alibaba/ilogtail/test/config" + "github.com/alibaba/ilogtail/test/engine/setup" +) + +func RestartAgent(ctx context.Context) (context.Context, error) { + if _, err := setup.Env.ExecOnLogtail("/etc/init.d/loongcollectord restart"); err != nil { + return ctx, err + } + return setup.SetAgentPID(ctx) +} + +func ForceRestartAgent(ctx context.Context) (context.Context, error) { + currentPID := ctx.Value(config.AgentPIDKey) + if currentPID != nil { + currentPIDs := strings.Split(strings.TrimSpace(currentPID.(string)), "\n") + for _, pid := range currentPIDs { + if _, err := setup.Env.ExecOnLogtail("kill -9 " + pid); err != nil { + fmt.Println("Force kill agent pid failed: ", err) + } + } + } else { + fmt.Println("No agent pid found, skip force restart") + } + if _, err := setup.Env.ExecOnLogtail("/etc/init.d/loongcollectord restart"); err != nil { + return ctx, err + } + return setup.SetAgentPID(ctx) +} diff --git a/test/engine/setup/chaos/network.go b/test/engine/setup/chaos/network.go new file mode 100644 index 0000000000..09d94b21bd --- /dev/null +++ b/test/engine/setup/chaos/network.go @@ -0,0 +1,156 @@ +// Copyright 2024 iLogtail Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package chaos + +import ( + "context" + "os" + "path/filepath" + "strconv" + "text/template" + + "github.com/alibaba/ilogtail/test/engine/setup" +) + +const ( + // networkDelayCRD + networkDelayCRDTmpl = ` +apiVersion: chaosblade.io/v1alpha1 +kind: ChaosBlade +metadata: + name: delay-pod-network +spec: + experiments: + - scope: pod + target: network + action: delay + desc: "delay pod network" + matchers: + - name: labels + value: ["{{.PodLabel}}"] + - name: namespace + value: ["kube-system"] + - name: interface + value: ["eth0"] + - name: destination-ip + value: ["{{.Percent}}"] + - name: time + value: ["{{.Time}}"] +` + + // networkLossCRD + networkLossCRDTmpl = ` +apiVersion: chaosblade.io/v1alpha1 +kind: ChaosBlade +metadata: + name: loss-pod-network +spec: + experiments: + - scope: pod + target: network + action: loss + desc: "loss pod network" + matchers: + - name: labels + value: ["{{.PodLabel}}"] + - name: namespace + value: ["kube-system"] + - name: interface + value: ["eth0"] + - name: percent + value: ["{{.Percent}}"] + - name: exclude-port + value: ["22"] + - name: destination-ip + value: ["{{.Ip}}"] +` +) + +func NetworkDelay(ctx context.Context, time int, ip string) (context.Context, error) { + switch setup.Env.GetType() { + case "host": + command := "/opt/chaosblade/blade create network delay --time " + strconv.FormatInt(int64(time), 10) + " --exclude-port 22 --interface eth0 --destination-ip " + ip + _, err := setup.Env.ExecOnLogtail(command) + if err != nil { + return ctx, err + } + case "daemonset", "deployment": + dir := filepath.Join("test_cases", "chaos") + filename := "loss-pod-network.yaml" + _ = os.Mkdir(dir, 0750) + file, err := os.OpenFile(filepath.Join(dir, filename), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) //nolint:gosec + if err != nil { + return ctx, err + } + defer file.Close() //nolint:gosec + + networkDelayCRD, _ := template.New("networkDelay").Parse(networkDelayCRDTmpl) + if err = networkDelayCRD.Execute(file, map[string]string{ + "PodLabel": getLoongCollectorPodLabel(), + "Time": strconv.FormatInt(int64(time), 10), + "Ip": ip, + }); err != nil { + return ctx, err + } + k8sEnv := setup.Env.(*setup.K8sEnv) + if err := k8sEnv.Apply(filepath.Join("chaos", filename)); err != nil { + return ctx, err + } + } + return ctx, nil +} + +func NetworkLoss(ctx context.Context, percentage int, ip string) (context.Context, error) { + switch setup.Env.GetType() { + case "host": + command := "/opt/chaosblade/blade create network loss --percent " + strconv.FormatInt(int64(percentage), 10) + " --exclude-port 22 --interface eth0 --destination-ip " + ip + _, err := setup.Env.ExecOnLogtail(command) + if err != nil { + return ctx, err + } + case "daemonset", "deployment": + dir := filepath.Join("test_cases", "chaos") + filename := "loss-pod-network.yaml" + _ = os.Mkdir(dir, 0750) + file, err := os.OpenFile(filepath.Join(dir, filename), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) //nolint:gosec + if err != nil { + return ctx, err + } + defer file.Close() //nolint:gosec + + networkLossCRD, _ := template.New("networkLoss").Parse(networkLossCRDTmpl) + if err = networkLossCRD.Execute(file, map[string]string{ + "PodLabel": getLoongCollectorPodLabel(), + "Percent": strconv.FormatInt(int64(percentage), 10), + "Ip": ip, + }); err != nil { + return ctx, err + } + k8sEnv := setup.Env.(*setup.K8sEnv) + if err := k8sEnv.Apply(filepath.Join("chaos", filename)); err != nil { + return ctx, err + } + } + return ctx, nil +} + +func getLoongCollectorPodLabel() string { + var PodLabel string + if setup.Env.GetType() == "daemonset" { + PodLabel = "k8s-app=logtail-ds" + } else if setup.Env.GetType() == "deployment" { + PodLabel = "k8s-app=loongcollector-cluster" + } + return PodLabel +} diff --git a/test/engine/setup/controller/kubernetes.go b/test/engine/setup/controller/kubernetes.go index 39efb316a3..0bd0a0f62e 100644 --- a/test/engine/setup/controller/kubernetes.go +++ b/test/engine/setup/controller/kubernetes.go @@ -230,18 +230,18 @@ func (c *DynamicController) Apply(filePath string) error { } // Apply the object to the Kubernetes cluster - namespace := obj.GetNamespace() - if namespace == "" { - namespace = "default" // Use default namespace if not specified - } - resourceInterface := c.dynamicClient.Resource(mapping.Resource).Namespace(namespace) - if _, err := resourceInterface.Get(context.TODO(), obj.GetName(), metav1.GetOptions{}); err != nil { + resourceInterface := c.dynamicClient.Resource(mapping.Resource) + if oldObj, err := resourceInterface.Get(context.TODO(), obj.GetName(), metav1.GetOptions{}); err != nil { + if !meta.IsNoMatchError(err) { + return err + } // Object does not exist, create it if _, err := resourceInterface.Create(context.TODO(), obj, metav1.CreateOptions{}); err != nil { return err } } else { // Object exists, update it + obj.SetResourceVersion(oldObj.GetResourceVersion()) if _, err := resourceInterface.Update(context.TODO(), obj, metav1.UpdateOptions{}); err != nil { return err } @@ -257,11 +257,7 @@ func (c *DynamicController) Delete(filePath string) error { } // Delete the object from the Kubernetes cluster - namespace := obj.GetNamespace() - if namespace == "" { - namespace = "default" // Use default namespace if not specified - } - resourceInterface := c.dynamicClient.Resource(mapping.Resource).Namespace(namespace) + resourceInterface := c.dynamicClient.Resource(mapping.Resource) if err := resourceInterface.Delete(context.TODO(), obj.GetName(), metav1.DeleteOptions{}); err != nil { return err } diff --git a/test/engine/setup/subscriber/sls.go b/test/engine/setup/subscriber/sls.go index 9d05c56cc6..b817bee8fe 100644 --- a/test/engine/setup/subscriber/sls.go +++ b/test/engine/setup/subscriber/sls.go @@ -29,6 +29,11 @@ flushers: type SLSSubscriber struct { client *sls.Client TelemetryType string + Aliuid string + Region string + Endpoint string + Project string + Logstore string } func (s *SLSSubscriber) Name() string { @@ -65,11 +70,11 @@ func (s *SLSSubscriber) FlusherConfig() string { tpl := template.Must(template.New("slsFlusherConfig").Parse(SLSFlusherConfigTemplate)) var builder strings.Builder _ = tpl.Execute(&builder, map[string]interface{}{ - "Aliuid": config.TestConfig.Aliuid, - "Region": config.TestConfig.Region, - "Endpoint": config.TestConfig.Endpoint, - "Project": config.TestConfig.Project, - "Logstore": config.TestConfig.GetLogstore(s.TelemetryType), + "Aliuid": s.Aliuid, + "Region": s.Region, + "Endpoint": s.Endpoint, + "Project": s.Project, + "Logstore": s.Logstore, "TelemetryType": s.TelemetryType, }) config := builder.String() @@ -135,9 +140,34 @@ func init() { } fmt.Println("create sls subscriber with telemetry type", telemetryType) l := &SLSSubscriber{ - client: createSLSClient(config.TestConfig.AccessKeyID, config.TestConfig.AccessKeySecret, config.TestConfig.QueryEndpoint), + client: createSLSClient(config.TestConfig.AccessKeyID, config.TestConfig.AccessKeySecret, config.GetQueryEndpoint()), TelemetryType: telemetryType, } + if v, ok := spec["aliuid"]; ok { + l.Aliuid = v.(string) + } else { + l.Aliuid = config.TestConfig.Aliuid + } + if v, ok := spec["region"]; ok { + l.Region = v.(string) + } else { + l.Region = config.TestConfig.Region + } + if v, ok := spec["endpoint"]; ok { + l.Endpoint = v.(string) + } else { + l.Endpoint = config.TestConfig.Endpoint + } + if v, ok := spec["project"]; ok { + l.Project = v.(string) + } else { + l.Project = config.TestConfig.Project + } + if v, ok := spec["logstore"]; ok { + l.Logstore = v.(string) + } else { + l.Logstore = config.TestConfig.GetLogstore(telemetryType) + } return l, nil }) doc.Register("subscriber", slsName, new(SLSSubscriber)) diff --git a/test/engine/steps.go b/test/engine/steps.go index 336a5ff8ac..dbbb2524dc 100644 --- a/test/engine/steps.go +++ b/test/engine/steps.go @@ -10,6 +10,7 @@ import ( "github.com/alibaba/ilogtail/test/engine/cleanup" "github.com/alibaba/ilogtail/test/engine/control" "github.com/alibaba/ilogtail/test/engine/setup" + "github.com/alibaba/ilogtail/test/engine/setup/chaos" "github.com/alibaba/ilogtail/test/engine/setup/monitor" "github.com/alibaba/ilogtail/test/engine/setup/subscriber" "github.com/alibaba/ilogtail/test/engine/trigger" @@ -31,6 +32,11 @@ func ScenarioInitializer(ctx *godog.ScenarioContext) { ctx.Given(`^subcribe data from \{(\S+)\} with config`, subscriber.InitSubscriber) ctx.Given(`^mkdir \{(.*)\}`, setup.Mkdir) ctx.Given(`^docker-compose boot type \{(\S+)\}$`, setup.SetDockerComposeBootType) + + // chaos + ctx.Given(`^network delay package \{(\d+)\}ms for ip \{(.*)\}`, chaos.NetworkDelay) + ctx.Given(`^network lost package \{(\d+)\}% for ip \{(.*)\}`, chaos.NetworkLoss) + ctx.Given(`^clean all chaos$`, cleanup.DestoryAllChaos) // ------------------------------------------ // When @@ -42,6 +48,8 @@ func ScenarioInitializer(ctx *godog.ScenarioContext) { ctx.When(`^query through \{(.*)\}`, control.SetQuery) ctx.When(`^apply yaml \{(.*)\} to k8s`, control.ApplyYaml) ctx.When(`^delete yaml \{(.*)\} from k8s`, control.DeleteYaml) + ctx.When(`^restart agent`, control.RestartAgent) + ctx.When(`^force restart agent`, control.ForceRestartAgent) // generate ctx.When(`^begin trigger`, trigger.BeginTrigger) diff --git a/test/engine/trigger/log/file.go b/test/engine/trigger/log/file.go index ddea428021..0319da3511 100644 --- a/test/engine/trigger/log/file.go +++ b/test/engine/trigger/log/file.go @@ -99,7 +99,6 @@ func generateBenchmark(ctx context.Context, mode, path string, rate, duration in } func wrapperCustomArgs(customArgs string) string { - fmt.Println(customArgs) customArgs = strings.ReplaceAll(customArgs, "\\", "\\\\") return "\"" + strings.ReplaceAll(customArgs, "\"", "\\\"") + "\"" } diff --git a/test/requirements.txt b/test/requirements.txt index ea45cd03b7..02ac307220 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -1 +1 @@ -Faker \ No newline at end of file +Faker==30.8.2 \ No newline at end of file From 0c301d25c6bec67699d2d79aaa88bb332eaa78de Mon Sep 17 00:00:00 2001 From: quzard <1191890118@qq.com> Date: Wed, 13 Nov 2024 09:52:47 +0800 Subject: [PATCH 04/10] Changed the endpoint source type from LOCAL to REMOTE in the Init method of FlusherSLS (#1873) --- core/application/Application.cpp | 3 ++- core/plugin/flusher/sls/FlusherSLS.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/core/application/Application.cpp b/core/application/Application.cpp index 11fe230a67..70d095c4a8 100644 --- a/core/application/Application.cpp +++ b/core/application/Application.cpp @@ -114,6 +114,7 @@ void Application::Init() { // Initialize basic information: IP, hostname, etc. LogFileProfiler::GetInstance(); #ifdef __ENTERPRISE__ + EnterpriseConfigProvider::GetInstance()->Init("enterprise"); EnterpriseConfigProvider::GetInstance()->LoadRegionConfig(); if (GlobalConf::Instance()->mStartWorkerStatus == "Crash") { LogtailAlarm::GetInstance()->SendAlarm(LOGTAIL_CRASH_ALARM, "Logtail Restart"); @@ -222,7 +223,7 @@ void Application::Start() { // GCOVR_EXCL_START } #ifdef __ENTERPRISE__ - EnterpriseConfigProvider::GetInstance()->Init("enterprise"); + EnterpriseConfigProvider::GetInstance()->Start(); LegacyConfigProvider::GetInstance()->Init("legacy"); #else InitRemoteConfigProviders(); diff --git a/core/plugin/flusher/sls/FlusherSLS.cpp b/core/plugin/flusher/sls/FlusherSLS.cpp index 524b4550b9..b6cf23fdf4 100644 --- a/core/plugin/flusher/sls/FlusherSLS.cpp +++ b/core/plugin/flusher/sls/FlusherSLS.cpp @@ -379,7 +379,7 @@ bool FlusherSLS::Init(const Json::Value& config, Json::Value& optionalGoPipeline SLSClientManager::GetInstance()->AddEndpointEntry(mRegion, StandardizeEndpoint(mEndpoint, mEndpoint), false, - SLSClientManager::EndpointSourceType::LOCAL); + SLSClientManager::EndpointSourceType::REMOTE); } } #ifdef __ENTERPRISE__ From 9005b10aa10bfeda76520e670c1d26706373309d Mon Sep 17 00:00:00 2001 From: linrunqi08 <90741255+linrunqi08@users.noreply.github.com> Date: Wed, 13 Nov 2024 10:15:50 +0800 Subject: [PATCH 05/10] Fix the issue of missing container information caused by the event sequence when docker compose is repeatedly up. (#1875) --- pkg/helper/docker_center.go | 55 +++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/pkg/helper/docker_center.go b/pkg/helper/docker_center.go index 193303e736..c4b6feef5e 100644 --- a/pkg/helper/docker_center.go +++ b/pkg/helper/docker_center.go @@ -16,6 +16,7 @@ package helper import ( "context" + "errors" "hash/fnv" "path" "regexp" @@ -1022,6 +1023,35 @@ func (dc *DockerCenter) updateContainer(id string, container *DockerInfoDetail) dc.refreshLastUpdateMapTime() } +func (dc *DockerCenter) inspectOneContainer(containerID string) (types.ContainerJSON, error) { + var err error + var containerDetail types.ContainerJSON + for idx := 0; idx < 3; idx++ { + if containerDetail, err = dc.client.ContainerInspect(context.Background(), containerID); err == nil { + break + } + time.Sleep(time.Second * 5) + } + if err != nil { + dc.setLastError(err, "inspect container error "+containerID) + return types.ContainerJSON{}, err + } + if !ContainerProcessAlive(containerDetail.State.Pid) { + containerDetail.State.Status = ContainerStatusExited + finishedAt := containerDetail.State.FinishedAt + finishedAtTime, _ := time.Parse(time.RFC3339, finishedAt) + now := time.Now() + duration := now.Sub(finishedAtTime) + if duration >= ContainerInfoDeletedTimeout { + errMsg := "inspect time out container " + containerID + err = errors.New(errMsg) + dc.setLastError(err, errMsg) + return types.ContainerJSON{}, err + } + } + return containerDetail, nil +} + func (dc *DockerCenter) fetchAll() error { dc.containerStateLock.Lock() defer dc.containerStateLock.Unlock() @@ -1035,26 +1065,9 @@ func (dc *DockerCenter) fetchAll() error { for _, container := range containers { var containerDetail types.ContainerJSON - for idx := 0; idx < 3; idx++ { - if containerDetail, err = dc.client.ContainerInspect(context.Background(), container.ID); err == nil { - break - } - time.Sleep(time.Second * 5) - } + containerDetail, err = dc.inspectOneContainer(container.ID) if err == nil { - if !ContainerProcessAlive(containerDetail.State.Pid) { - containerDetail.State.Status = ContainerStatusExited - finishedAt := containerDetail.State.FinishedAt - finishedAtTime, _ := time.Parse(time.RFC3339, finishedAt) - now := time.Now() - duration := now.Sub(finishedAtTime) - if duration >= ContainerInfoDeletedTimeout { - continue - } - } containerMap[container.ID] = dc.CreateInfoDetail(containerDetail, envConfigPrefix, false) - } else { - dc.setLastError(err, "inspect container error "+container.ID) } } dc.updateContainers(containerMap) @@ -1065,14 +1078,10 @@ func (dc *DockerCenter) fetchAll() error { func (dc *DockerCenter) fetchOne(containerID string, tryFindSandbox bool) error { dc.containerStateLock.Lock() defer dc.containerStateLock.Unlock() - containerDetail, err := dc.client.ContainerInspect(context.Background(), containerID) + containerDetail, err := dc.inspectOneContainer(containerID) if err != nil { - dc.setLastError(err, "inspect container error "+containerID) return err } - if containerDetail.State.Status == ContainerStatusRunning && !ContainerProcessAlive(containerDetail.State.Pid) { - containerDetail.State.Status = ContainerStatusExited - } // docker 场景下 // tryFindSandbox如果是false, 那么fetchOne的地方应该会调用两次,一次是sandbox的id,一次是业务容器的id // tryFindSandbox如果是true, 调用的地方只会有一个业务容器的id,然后依赖fetchOne内部把sandbox信息补全 From eaf69cbd16c5a035fe2ba1372592e8e37759fd6d Mon Sep 17 00:00:00 2001 From: Takuka0311 <1914426213@qq.com> Date: Wed, 13 Nov 2024 10:22:17 +0800 Subject: [PATCH 06/10] change metric struct (#1862) --- core/common/compression/Compressor.cpp | 4 +- core/common/compression/CompressorFactory.cpp | 3 +- .../ContainerDiscoveryOptions.cpp | 2 +- .../ContainerDiscoveryOptions.h | 2 +- core/ebpf/SelfMonitor.cpp | 2 +- core/ebpf/SelfMonitor.h | 2 +- core/ebpf/eBPFServer.cpp | 8 +- core/ebpf/eBPFServer.h | 4 +- core/file_server/FileServer.cpp | 4 +- core/monitor/LogtailMetric.cpp | 138 +++++++++++------- core/monitor/LogtailMetric.h | 38 ++++- core/monitor/MetricExportor.cpp | 52 +++++-- core/monitor/Monitor.cpp | 3 +- core/monitor/PluginMetricManager.cpp | 4 +- core/monitor/PluginMetricManager.h | 14 +- .../monitor/metric_constants/AgentMetrics.cpp | 18 +-- .../metric_constants/ComponentMetrics.cpp | 62 ++++---- .../MetricCommonConstants.cpp | 35 +++++ .../metric_constants/MetricCommonConstants.h | 36 +++++ .../metric_constants/MetricConstants.h | 73 +++++---- .../metric_constants/PipelineMetrics.cpp | 21 ++- .../metric_constants/PluginMetrics.cpp | 103 +++++++------ .../metric_constants/RunnerMetrics.cpp | 58 ++++---- core/pipeline/Pipeline.cpp | 5 +- core/pipeline/batch/Batcher.h | 4 +- core/pipeline/plugin/interface/Plugin.h | 2 +- core/pipeline/queue/QueueInterface.h | 2 +- core/pipeline/route/Router.cpp | 4 +- core/pipeline/serializer/Serializer.h | 4 +- core/plugin/input/InputContainerStdio.cpp | 7 +- core/plugin/input/InputFile.cpp | 4 +- core/plugin/input/InputFileSecurity.cpp | 2 +- core/plugin/input/InputFileSecurity.h | 2 +- core/plugin/input/InputNetworkObserver.cpp | 2 +- core/plugin/input/InputNetworkObserver.h | 2 +- core/plugin/input/InputNetworkSecurity.cpp | 2 +- core/plugin/input/InputNetworkSecurity.h | 2 +- core/plugin/input/InputProcessSecurity.cpp | 2 +- core/plugin/input/InputProcessSecurity.h | 2 +- core/prometheus/PromSelfMonitor.cpp | 2 +- core/prometheus/PrometheusInputRunner.cpp | 3 +- .../prometheus/schedulers/ScrapeScheduler.cpp | 2 +- .../schedulers/TargetSubscriberScheduler.cpp | 2 +- core/runner/FlusherRunner.cpp | 4 +- core/runner/ProcessorRunner.cpp | 6 +- core/runner/sink/http/HttpSink.cpp | 4 +- core/unittest/batch/BatcherUnittest.cpp | 2 +- .../compression/CompressorFactoryUnittest.cpp | 2 +- .../monitor/LogtailMetricUnittest.cpp | 14 +- .../monitor/PluginMetricManagerUnittest.cpp | 7 +- core/unittest/pipeline/PipelineUnittest.cpp | 8 +- .../prometheus/PromSelfMonitorUnittest.cpp | 4 +- .../queue/BoundedProcessQueueUnittest.cpp | 2 +- .../queue/CircularProcessQueueUnittest.cpp | 2 +- core/unittest/queue/SenderQueueUnittest.cpp | 2 +- core/unittest/route/RouterUnittest.cpp | 2 +- pkg/helper/k8smeta/k8s_meta_manager.go | 10 +- pkg/helper/self_metrics_agent_constants.go | 4 +- pkg/helper/self_metrics_plugin_constants.go | 62 ++++---- pkg/helper/self_metrics_runner_constants.go | 35 +++-- 60 files changed, 538 insertions(+), 375 deletions(-) create mode 100644 core/monitor/metric_constants/MetricCommonConstants.cpp create mode 100644 core/monitor/metric_constants/MetricCommonConstants.h diff --git a/core/common/compression/Compressor.cpp b/core/common/compression/Compressor.cpp index 050d62123b..134e6e4737 100644 --- a/core/common/compression/Compressor.cpp +++ b/core/common/compression/Compressor.cpp @@ -24,14 +24,14 @@ namespace logtail { void Compressor::SetMetricRecordRef(MetricLabels&& labels, DynamicMetricLabels&& dynamicLabels) { WriteMetrics::GetInstance()->PrepareMetricsRecordRef( - mMetricsRecordRef, std::move(labels), std::move(dynamicLabels)); + mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_COMPONENT, std::move(labels), std::move(dynamicLabels)); mInItemsTotal = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_IN_ITEMS_TOTAL); mInItemSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_IN_SIZE_BYTES); mOutItemsTotal = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_OUT_ITEMS_TOTAL); mOutItemSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_OUT_SIZE_BYTES); mTotalProcessMs = mMetricsRecordRef.CreateTimeCounter(METRIC_COMPONENT_TOTAL_PROCESS_TIME_MS); mDiscardedItemsTotal = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_DISCARDED_ITEMS_TOTAL); - mDiscardedItemSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_DISCARDED_ITEMS_SIZE_BYTES); + mDiscardedItemSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_DISCARDED_SIZE_BYTES); } bool Compressor::DoCompress(const string& input, string& output, string& errorMsg) { diff --git a/core/common/compression/CompressorFactory.cpp b/core/common/compression/CompressorFactory.cpp index ed3fe2703f..c1d4ff0e78 100644 --- a/core/common/compression/CompressorFactory.cpp +++ b/core/common/compression/CompressorFactory.cpp @@ -15,9 +15,9 @@ #include "common/compression/CompressorFactory.h" #include "common/ParamExtractor.h" -#include "monitor/metric_constants/MetricConstants.h" #include "common/compression/LZ4Compressor.h" #include "common/compression/ZstdCompressor.h" +#include "monitor/metric_constants/MetricConstants.h" using namespace std; @@ -64,7 +64,6 @@ unique_ptr CompressorFactory::Create(const Json::Value& config, compressor->SetMetricRecordRef({{METRIC_LABEL_KEY_PROJECT, ctx.GetProjectName()}, {METRIC_LABEL_KEY_PIPELINE_NAME, ctx.GetConfigName()}, {METRIC_LABEL_KEY_COMPONENT_NAME, METRIC_LABEL_VALUE_COMPONENT_NAME_COMPRESSOR}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_COMPONENT}, {METRIC_LABEL_KEY_FLUSHER_PLUGIN_ID, flusherId}}); return compressor; } diff --git a/core/container_manager/ContainerDiscoveryOptions.cpp b/core/container_manager/ContainerDiscoveryOptions.cpp index f9cdb6111a..ce59ef41f5 100644 --- a/core/container_manager/ContainerDiscoveryOptions.cpp +++ b/core/container_manager/ContainerDiscoveryOptions.cpp @@ -200,7 +200,7 @@ bool ContainerDiscoveryOptions::Init(const Json::Value& config, const PipelineCo void ContainerDiscoveryOptions::GenerateContainerMetaFetchingGoPipeline(Json::Value& res, const FileDiscoveryOptions* fileDiscovery, - const PluginInstance::PluginMeta pluginMeta) const { + const PluginInstance::PluginMeta& pluginMeta) const { Json::Value plugin(Json::objectValue); Json::Value detail(Json::objectValue); Json::Value object(Json::objectValue); diff --git a/core/container_manager/ContainerDiscoveryOptions.h b/core/container_manager/ContainerDiscoveryOptions.h index 1b1b06ccaf..2a106c819f 100644 --- a/core/container_manager/ContainerDiscoveryOptions.h +++ b/core/container_manager/ContainerDiscoveryOptions.h @@ -52,7 +52,7 @@ struct ContainerDiscoveryOptions { bool Init(const Json::Value& config, const PipelineContext& ctx, const std::string& pluginType); void GenerateContainerMetaFetchingGoPipeline(Json::Value& res, const FileDiscoveryOptions* fileDiscovery = nullptr, - const PluginInstance::PluginMeta pluginMeta = {"0"}) const; + const PluginInstance::PluginMeta& pluginMeta = {"0"}) const; }; using ContainerDiscoveryConfig = std::pair; diff --git a/core/ebpf/SelfMonitor.cpp b/core/ebpf/SelfMonitor.cpp index ad49fce33c..d352cfbaeb 100644 --- a/core/ebpf/SelfMonitor.cpp +++ b/core/ebpf/SelfMonitor.cpp @@ -179,7 +179,7 @@ void NetworkObserverSelfMonitor::HandleStatistic(nami::eBPFStatistics& stats) { eBPFSelfMonitorMgr::eBPFSelfMonitorMgr() : mSelfMonitors({}), mInited({}) {} -void eBPFSelfMonitorMgr::Init(const nami::PluginType type, std::shared_ptr mgr, const std::string& name, const std::string& logstore) { +void eBPFSelfMonitorMgr::Init(const nami::PluginType type, PluginMetricManagerPtr mgr, const std::string& name, const std::string& logstore) { if (mInited[int(type)]) return; WriteLock lk(mLock); diff --git a/core/ebpf/SelfMonitor.h b/core/ebpf/SelfMonitor.h index 4d59934266..4b8551f1ef 100644 --- a/core/ebpf/SelfMonitor.h +++ b/core/ebpf/SelfMonitor.h @@ -122,7 +122,7 @@ class FileSecuritySelfMonitor : public BaseBPFMonitor { class eBPFSelfMonitorMgr { public: eBPFSelfMonitorMgr(); - void Init(const nami::PluginType type, std::shared_ptr mgr, const std::string& name, const std::string& project); + void Init(const nami::PluginType type, PluginMetricManagerPtr mgr, const std::string& name, const std::string& project); void Release(const nami::PluginType type); void Suspend(const nami::PluginType type); void HandleStatistic(std::vector&& stats); diff --git a/core/ebpf/eBPFServer.cpp b/core/ebpf/eBPFServer.cpp index 874a29ec5a..501f833806 100644 --- a/core/ebpf/eBPFServer.cpp +++ b/core/ebpf/eBPFServer.cpp @@ -152,8 +152,8 @@ void eBPFServer::Init() { DynamicMetricLabels dynamicLabels; dynamicLabels.emplace_back(METRIC_LABEL_KEY_PROJECT, [this]() -> std::string { return this->GetAllProjects(); }); WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mRef, - {{METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_EBPF_SERVER}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_RUNNER}}, + MetricCategory::METRIC_CATEGORY_RUNNER, + {{METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_EBPF_SERVER}}, std::move(dynamicLabels)); mStartPluginTotal = mRef.CreateCounter(METRIC_RUNNER_EBPF_START_PLUGIN_TOTAL); @@ -202,7 +202,7 @@ void eBPFServer::Stop() { bool eBPFServer::StartPluginInternal(const std::string& pipeline_name, uint32_t plugin_index, nami::PluginType type, const logtail::PipelineContext* ctx, - const std::variant options, std::shared_ptr mgr) { + const std::variant options, PluginMetricManagerPtr mgr) { std::string prev_pipeline_name = CheckLoadedPipelineName(type); if (prev_pipeline_name.size() && prev_pipeline_name != pipeline_name) { @@ -314,7 +314,7 @@ bool eBPFServer::HasRegisteredPlugins() const { bool eBPFServer::EnablePlugin(const std::string& pipeline_name, uint32_t plugin_index, nami::PluginType type, const PipelineContext* ctx, - const std::variant options, std::shared_ptr mgr) { + const std::variant options, PluginMetricManagerPtr mgr) { if (!IsSupportedEnv(type)) { return false; } diff --git a/core/ebpf/eBPFServer.h b/core/ebpf/eBPFServer.h index 4887080a79..9141a9ec9a 100644 --- a/core/ebpf/eBPFServer.h +++ b/core/ebpf/eBPFServer.h @@ -72,7 +72,7 @@ class eBPFServer : public InputRunner { bool EnablePlugin(const std::string& pipeline_name, uint32_t plugin_index, nami::PluginType type, const logtail::PipelineContext* ctx, - const std::variant options, std::shared_ptr mgr); + const std::variant options, PluginMetricManagerPtr mgr); bool DisablePlugin(const std::string& pipeline_name, nami::PluginType type); @@ -88,7 +88,7 @@ class eBPFServer : public InputRunner { bool StartPluginInternal(const std::string& pipeline_name, uint32_t plugin_index, nami::PluginType type, const logtail::PipelineContext* ctx, - const std::variant options, std::shared_ptr mgr); + const std::variant options, PluginMetricManagerPtr mgr); eBPFServer() = default; ~eBPFServer() = default; diff --git a/core/file_server/FileServer.cpp b/core/file_server/FileServer.cpp index 5a072c6c0f..0741efefca 100644 --- a/core/file_server/FileServer.cpp +++ b/core/file_server/FileServer.cpp @@ -34,8 +34,8 @@ namespace logtail { FileServer::FileServer() { WriteMetrics::GetInstance()->PrepareMetricsRecordRef( mMetricsRecordRef, - {{METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_FILE_SERVER}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_RUNNER}}); + MetricCategory::METRIC_CATEGORY_RUNNER, + {{METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_FILE_SERVER}}); } // 启动文件服务,包括加载配置、处理检查点、注册事件等 diff --git a/core/monitor/LogtailMetric.cpp b/core/monitor/LogtailMetric.cpp index e10c44fbb1..239f6f8c9c 100644 --- a/core/monitor/LogtailMetric.cpp +++ b/core/monitor/LogtailMetric.cpp @@ -25,11 +25,19 @@ using namespace sls_logs; namespace logtail { -const std::string LABEL_PREFIX = "label."; -const std::string VALUE_PREFIX = "value."; +const std::string METRIC_KEY_LABEL = "label"; +const std::string METRIC_KEY_VALUE = "value"; +const std::string METRIC_KEY_CATEGORY = "category"; +const std::string MetricCategory::METRIC_CATEGORY_UNKNOWN = "unknown"; +const std::string MetricCategory::METRIC_CATEGORY_AGENT = "agent"; +const std::string MetricCategory::METRIC_CATEGORY_RUNNER = "runner"; +const std::string MetricCategory::METRIC_CATEGORY_PIPELINE = "pipeline"; +const std::string MetricCategory::METRIC_CATEGORY_COMPONENT = "component"; +const std::string MetricCategory::METRIC_CATEGORY_PLUGIN = "plugin"; +const std::string MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE = "plugin_source"; -MetricsRecord::MetricsRecord(MetricLabelsPtr labels, DynamicMetricLabelsPtr dynamicLabels) - : mLabels(labels), mDynamicLabels(dynamicLabels), mDeleted(false) { +MetricsRecord::MetricsRecord(const std::string& category, MetricLabelsPtr labels, DynamicMetricLabelsPtr dynamicLabels) + : mCategory(category), mLabels(labels), mDynamicLabels(dynamicLabels), mDeleted(false) { } CounterPtr MetricsRecord::CreateCounter(const std::string& name) { @@ -64,6 +72,10 @@ bool MetricsRecord::IsDeleted() const { return mDeleted; } +const std::string& MetricsRecord::GetCategory() const { + return mCategory; +} + const MetricLabelsPtr& MetricsRecord::GetLabels() const { return mLabels; } @@ -89,7 +101,7 @@ const std::vector& MetricsRecord::GetDoubleGauges() const { } MetricsRecord* MetricsRecord::Collect() { - MetricsRecord* metrics = new MetricsRecord(mLabels, mDynamicLabels); + MetricsRecord* metrics = new MetricsRecord(mCategory, mLabels, mDynamicLabels); for (auto& item : mCounters) { CounterPtr newPtr(item->Collect()); metrics->mCounters.emplace_back(newPtr); @@ -127,6 +139,10 @@ void MetricsRecordRef::SetMetricsRecord(MetricsRecord* metricRecord) { mMetrics = metricRecord; } +const std::string& MetricsRecordRef::GetCategory() const { + return mMetrics->GetCategory(); +} + const MetricLabelsPtr& MetricsRecordRef::GetLabels() const { return mMetrics->GetLabels(); } @@ -171,8 +187,12 @@ bool MetricsRecordRef::HasLabel(const std::string& key, const std::string& value #endif // ReentrantMetricsRecord相关操作可以无锁,因为mCounters、mGauges只在初始化时会添加内容,后续只允许Get操作 -void ReentrantMetricsRecord::Init(MetricLabels& labels, std::unordered_map& metricKeys) { - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, std::move(labels)); +void ReentrantMetricsRecord::Init(const std::string& category, + MetricLabels& labels, + DynamicMetricLabels& dynamicLabels, + std::unordered_map& metricKeys) { + WriteMetrics::GetInstance()->PrepareMetricsRecordRef( + mMetricsRecordRef, category, std::move(labels), std::move(dynamicLabels)); for (auto metric : metricKeys) { switch (metric.second) { case MetricType::METRIC_TYPE_COUNTER: @@ -237,17 +257,19 @@ WriteMetrics::~WriteMetrics() { } void WriteMetrics::PrepareMetricsRecordRef(MetricsRecordRef& ref, + const std::string& category, MetricLabels&& labels, DynamicMetricLabels&& dynamicLabels) { - CreateMetricsRecordRef(ref, std::move(labels), std::move(dynamicLabels)); + CreateMetricsRecordRef(ref, category, std::move(labels), std::move(dynamicLabels)); CommitMetricsRecordRef(ref); } void WriteMetrics::CreateMetricsRecordRef(MetricsRecordRef& ref, + const std::string& category, MetricLabels&& labels, DynamicMetricLabels&& dynamicLabels) { - MetricsRecord* cur = new MetricsRecord(std::make_shared(labels), - std::make_shared(dynamicLabels)); + MetricsRecord* cur = new MetricsRecord( + category, std::make_shared(labels), std::make_shared(dynamicLabels)); ref.SetMetricsRecord(cur); } @@ -395,42 +417,53 @@ void ReadMetrics::ReadAsLogGroup(const std::string& regionFieldName, auto now = GetCurrentLogtailTime(); SetLogTime(logPtr, AppConfig::GetInstance()->EnableLogTimeAutoAdjust() ? now.tv_sec + GetTimeDelta() : now.tv_sec); - for (auto item = tmp->GetLabels()->begin(); item != tmp->GetLabels()->end(); ++item) { - std::pair pair = *item; - Log_Content* contentPtr = logPtr->add_contents(); - contentPtr->set_key(LABEL_PREFIX + pair.first); - contentPtr->set_value(pair.second); - } - for (auto item = tmp->GetDynamicLabels()->begin(); item != tmp->GetDynamicLabels()->end(); ++item) { - std::pair> pair = *item; - Log_Content* contentPtr = logPtr->add_contents(); - contentPtr->set_key(LABEL_PREFIX + pair.first); - contentPtr->set_value(pair.second()); - } - - for (auto& item : tmp->GetCounters()) { - CounterPtr counter = item; + { // category Log_Content* contentPtr = logPtr->add_contents(); - contentPtr->set_key(VALUE_PREFIX + counter->GetName()); - contentPtr->set_value(ToString(counter->GetValue())); + contentPtr->set_key(METRIC_KEY_CATEGORY); + contentPtr->set_value(tmp->GetCategory()); } - for (auto& item : tmp->GetTimeCounters()) { - TimeCounterPtr counter = item; - Log_Content* contentPtr = logPtr->add_contents(); - contentPtr->set_key(VALUE_PREFIX + counter->GetName()); - contentPtr->set_value(ToString(counter->GetValue())); - } - for (auto& item : tmp->GetIntGauges()) { - IntGaugePtr gauge = item; + { // label + Json::Value metricsRecordLabel; + for (auto item = tmp->GetLabels()->begin(); item != tmp->GetLabels()->end(); ++item) { + std::pair pair = *item; + metricsRecordLabel[pair.first] = pair.second; + } + for (auto item = tmp->GetDynamicLabels()->begin(); item != tmp->GetDynamicLabels()->end(); ++item) { + std::pair> pair = *item; + metricsRecordLabel[pair.first] = pair.second(); + } + Json::StreamWriterBuilder writer; + writer["indentation"] = ""; + std::string jsonString = Json::writeString(writer, metricsRecordLabel); Log_Content* contentPtr = logPtr->add_contents(); - contentPtr->set_key(VALUE_PREFIX + gauge->GetName()); - contentPtr->set_value(ToString(gauge->GetValue())); + contentPtr->set_key(METRIC_KEY_LABEL); + contentPtr->set_value(jsonString); } - for (auto& item : tmp->GetDoubleGauges()) { - DoubleGaugePtr gauge = item; - Log_Content* contentPtr = logPtr->add_contents(); - contentPtr->set_key(VALUE_PREFIX + gauge->GetName()); - contentPtr->set_value(ToString(gauge->GetValue())); + { // value + for (auto& item : tmp->GetCounters()) { + CounterPtr counter = item; + Log_Content* contentPtr = logPtr->add_contents(); + contentPtr->set_key(counter->GetName()); + contentPtr->set_value(ToString(counter->GetValue())); + } + for (auto& item : tmp->GetTimeCounters()) { + TimeCounterPtr counter = item; + Log_Content* contentPtr = logPtr->add_contents(); + contentPtr->set_key(counter->GetName()); + contentPtr->set_value(ToString(counter->GetValue())); + } + for (auto& item : tmp->GetIntGauges()) { + IntGaugePtr gauge = item; + Log_Content* contentPtr = logPtr->add_contents(); + contentPtr->set_key(gauge->GetName()); + contentPtr->set_value(ToString(gauge->GetValue())); + } + for (auto& item : tmp->GetDoubleGauges()) { + DoubleGaugePtr gauge = item; + Log_Content* contentPtr = logPtr->add_contents(); + contentPtr->set_key(gauge->GetName()); + contentPtr->set_value(ToString(gauge->GetValue())); + } } tmp = tmp->GetNext(); } @@ -443,40 +476,43 @@ void ReadMetrics::ReadAsFileBuffer(std::string& metricsContent) const { MetricsRecord* tmp = mHead; while (tmp) { - Json::Value metricsRecordValue; + Json::Value metricsRecordJson, metricsRecordLabel; auto now = GetCurrentLogtailTime(); - metricsRecordValue["time"] + metricsRecordJson["time"] = AppConfig::GetInstance()->EnableLogTimeAutoAdjust() ? now.tv_sec + GetTimeDelta() : now.tv_sec; + metricsRecordJson[METRIC_KEY_CATEGORY] = tmp->GetCategory(); + for (auto item = tmp->GetLabels()->begin(); item != tmp->GetLabels()->end(); ++item) { std::pair pair = *item; - metricsRecordValue[LABEL_PREFIX + pair.first] = pair.second; + metricsRecordLabel[pair.first] = pair.second; } for (auto item = tmp->GetDynamicLabels()->begin(); item != tmp->GetDynamicLabels()->end(); ++item) { std::pair> pair = *item; - metricsRecordValue[LABEL_PREFIX + pair.first] = pair.second(); + metricsRecordLabel[pair.first] = pair.second(); } + metricsRecordJson[METRIC_KEY_LABEL] = metricsRecordLabel; for (auto& item : tmp->GetCounters()) { CounterPtr counter = item; - metricsRecordValue[VALUE_PREFIX + counter->GetName()] = ToString(counter->GetValue()); + metricsRecordJson[counter->GetName()] = ToString(counter->GetValue()); } for (auto& item : tmp->GetTimeCounters()) { TimeCounterPtr counter = item; - metricsRecordValue[VALUE_PREFIX + counter->GetName()] = ToString(counter->GetValue()); + metricsRecordJson[counter->GetName()] = ToString(counter->GetValue()); } for (auto& item : tmp->GetIntGauges()) { IntGaugePtr gauge = item; - metricsRecordValue[VALUE_PREFIX + gauge->GetName()] = ToString(gauge->GetValue()); + metricsRecordJson[gauge->GetName()] = ToString(gauge->GetValue()); } for (auto& item : tmp->GetDoubleGauges()) { DoubleGaugePtr gauge = item; - metricsRecordValue[VALUE_PREFIX + gauge->GetName()] = ToString(gauge->GetValue()); + metricsRecordJson[gauge->GetName()] = ToString(gauge->GetValue()); } Json::StreamWriterBuilder writer; writer["indentation"] = ""; - std::string jsonString = Json::writeString(writer, metricsRecordValue); + std::string jsonString = Json::writeString(writer, metricsRecordJson); oss << jsonString << '\n'; tmp = tmp->GetNext(); diff --git a/core/monitor/LogtailMetric.h b/core/monitor/LogtailMetric.h index 1c02fa4814..2065f2184f 100644 --- a/core/monitor/LogtailMetric.h +++ b/core/monitor/LogtailMetric.h @@ -28,22 +28,39 @@ namespace logtail { +extern const std::string METRIC_KEY_LABEL; +extern const std::string METRIC_KEY_VALUE; +extern const std::string METRIC_KEY_CATEGORY; +class MetricCategory { +public: + static const std::string METRIC_CATEGORY_UNKNOWN; + static const std::string METRIC_CATEGORY_AGENT; + static const std::string METRIC_CATEGORY_RUNNER; + static const std::string METRIC_CATEGORY_PIPELINE; + static const std::string METRIC_CATEGORY_COMPONENT; + static const std::string METRIC_CATEGORY_PLUGIN; + static const std::string METRIC_CATEGORY_PLUGIN_SOURCE; +}; + class MetricsRecord { private: + std::string mCategory; MetricLabelsPtr mLabels; DynamicMetricLabelsPtr mDynamicLabels; - std::atomic_bool mDeleted; std::vector mCounters; std::vector mTimeCounters; std::vector mIntGauges; std::vector mDoubleGauges; + + std::atomic_bool mDeleted; MetricsRecord* mNext = nullptr; public: - MetricsRecord(MetricLabelsPtr labels, DynamicMetricLabelsPtr dynamicLabels = nullptr); + MetricsRecord(const std::string& category, MetricLabelsPtr labels, DynamicMetricLabelsPtr dynamicLabels = nullptr); MetricsRecord() = default; void MarkDeleted(); bool IsDeleted() const; + const std::string& GetCategory() const; const MetricLabelsPtr& GetLabels() const; const DynamicMetricLabelsPtr& GetDynamicLabels() const; const std::vector& GetCounters() const; @@ -75,6 +92,7 @@ class MetricsRecordRef { MetricsRecordRef(MetricsRecordRef&&) = delete; MetricsRecordRef& operator=(MetricsRecordRef&&) = delete; void SetMetricsRecord(MetricsRecord* metricRecord); + const std::string& GetCategory() const; const MetricLabelsPtr& GetLabels() const; const DynamicMetricLabelsPtr& GetDynamicLabels() const; CounterPtr CreateCounter(const std::string& name); @@ -114,7 +132,10 @@ class ReentrantMetricsRecord { std::unordered_map mDoubleGauges; public: - void Init(MetricLabels& labels, std::unordered_map& metricKeys); + void Init(const std::string& category, + MetricLabels& labels, + DynamicMetricLabels& dynamicLabels, + std::unordered_map& metricKeys); const MetricLabelsPtr& GetLabels() const; const DynamicMetricLabelsPtr& GetDynamicLabels() const; CounterPtr GetCounter(const std::string& name); @@ -140,9 +161,14 @@ class WriteMetrics { return ptr; } - void - PrepareMetricsRecordRef(MetricsRecordRef& ref, MetricLabels&& labels, DynamicMetricLabels&& dynamicLabels = {}); - void CreateMetricsRecordRef(MetricsRecordRef& ref, MetricLabels&& labels, DynamicMetricLabels&& dynamicLabels = {}); + void PrepareMetricsRecordRef(MetricsRecordRef& ref, + const std::string& category, + MetricLabels&& labels, + DynamicMetricLabels&& dynamicLabels = {}); + void CreateMetricsRecordRef(MetricsRecordRef& ref, + const std::string& category, + MetricLabels&& labels, + DynamicMetricLabels&& dynamicLabels = {}); void CommitMetricsRecordRef(MetricsRecordRef& ref); MetricsRecord* DoSnapshot(); diff --git a/core/monitor/MetricExportor.cpp b/core/monitor/MetricExportor.cpp index 5c4356389a..99c829d8d1 100644 --- a/core/monitor/MetricExportor.cpp +++ b/core/monitor/MetricExportor.cpp @@ -16,7 +16,6 @@ #include -#include "app_config/AppConfig.h" #include "LogFileProfiler.h" #include "LogtailMetric.h" #include "MetricConstants.h" @@ -35,7 +34,6 @@ DECLARE_FLAG_STRING(metrics_report_method); namespace logtail { -const string METRIC_REGION_FIELD_NAME = "region"; const string METRIC_REGION_DEFAULT = "default"; const string METRIC_SLS_LOGSTORE_NAME = "shennong_log_profile"; const string METRIC_TOPIC_TYPE = "loong_collector_metric"; @@ -66,7 +64,7 @@ void MetricExportor::PushCppMetrics() { if ("sls" == STRING_FLAG(metrics_report_method)) { std::map logGroupMap; - ReadMetrics::GetInstance()->ReadAsLogGroup(METRIC_REGION_FIELD_NAME, METRIC_REGION_DEFAULT, logGroupMap); + ReadMetrics::GetInstance()->ReadAsLogGroup(METRIC_LABEL_KEY_REGION, METRIC_REGION_DEFAULT, logGroupMap); SendToSLS(logGroupMap); } else if ("file" == STRING_FLAG(metrics_report_method)) { std::string metricsContent; @@ -175,10 +173,10 @@ void MetricExportor::PushGoCppProvidedMetrics(std::vectorSetAgentGoMemory(std::stoi(metric.second)); } - if (metric.first == METRIC_AGENT_GO_ROUTINES_TOTAL) { + if (metric.first == METRIC_KEY_VALUE + "." + METRIC_AGENT_GO_ROUTINES_TOTAL) { LoongCollectorMonitor::GetInstance()->SetAgentGoRoutinesTotal(std::stoi(metric.second)); } LogtailMonitor::GetInstance()->UpdateMetric(metric.first, metric.second); @@ -193,15 +191,15 @@ void MetricExportor::SerializeGoDirectMetricsListToLogGroupMap( std::string configName = ""; std::string region = METRIC_REGION_DEFAULT; { - // get the config_name label + // get the pipeline_name label for (const auto& metric : metrics) { - if (metric.first == "label.config_name") { + if (metric.first == METRIC_KEY_LABEL + "." + METRIC_LABEL_KEY_PIPELINE_NAME) { configName = metric.second; break; } } if (!configName.empty()) { - // get region info by config_name + // get region info by pipeline_name shared_ptr p = PipelineManager::GetInstance()->FindConfigByName(configName); if (p) { FlusherSLS* pConfig = NULL; @@ -225,11 +223,32 @@ void MetricExportor::SerializeGoDirectMetricsListToLogGroupMap( auto now = GetCurrentLogtailTime(); SetLogTime(logPtr, AppConfig::GetInstance()->EnableLogTimeAutoAdjust() ? now.tv_sec + GetTimeDelta() : now.tv_sec); + + Json::Value metricsRecordLabel; for (const auto& metric : metrics) { + // category + if (metric.first.compare("label.metric_category") == 0) { + Log_Content* contentPtr = logPtr->add_contents(); + contentPtr->set_key(METRIC_KEY_CATEGORY); + contentPtr->set_value(metric.second); + continue; + } + // label + if (metric.first.compare(0, METRIC_KEY_LABEL.length(), METRIC_KEY_LABEL)) { + metricsRecordLabel[metric.first.substr(METRIC_KEY_LABEL.length() + 1)] = metric.second; + continue; + } + // value Log_Content* contentPtr = logPtr->add_contents(); contentPtr->set_key(metric.first); contentPtr->set_value(metric.second); } + Json::StreamWriterBuilder writer; + writer["indentation"] = ""; + std::string jsonString = Json::writeString(writer, metricsRecordLabel); + Log_Content* contentPtr = logPtr->add_contents(); + contentPtr->set_key(METRIC_KEY_LABEL); + contentPtr->set_value(jsonString); } } @@ -238,16 +257,25 @@ void MetricExportor::SerializeGoDirectMetricsListToString(std::vectorEnableLogTimeAutoAdjust() ? now.tv_sec + GetTimeDelta() : now.tv_sec; for (const auto& metric : metrics) { - metricsRecordValue[metric.first] = metric.second; + if (metric.first.compare("label.metric_category") == 0) { + metricsRecordJson[METRIC_KEY_CATEGORY] = metric.second; + continue; + } + if (metric.first.compare(0, METRIC_KEY_LABEL.length(), METRIC_KEY_LABEL) == 0) { + metricsRecordLabel[metric.first.substr(METRIC_KEY_LABEL.length() + 1)] = metric.second; + continue; + } + metricsRecordJson[metric.first.substr(METRIC_KEY_VALUE.length() + 1)] = metric.second; } + metricsRecordJson[METRIC_KEY_LABEL] = metricsRecordLabel; Json::StreamWriterBuilder writer; writer["indentation"] = ""; - std::string jsonString = Json::writeString(writer, metricsRecordValue); + std::string jsonString = Json::writeString(writer, metricsRecordJson); oss << jsonString << '\n'; } metricsContent = oss.str(); diff --git a/core/monitor/Monitor.cpp b/core/monitor/Monitor.cpp index 7eca928e46..3bb00943f1 100644 --- a/core/monitor/Monitor.cpp +++ b/core/monitor/Monitor.cpp @@ -693,7 +693,6 @@ LoongCollectorMonitor* LoongCollectorMonitor::GetInstance() { void LoongCollectorMonitor::Init() { // create metric record MetricLabels labels; - labels.emplace_back(METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_AGENT); labels.emplace_back(METRIC_LABEL_KEY_INSTANCE_ID, Application::GetInstance()->GetInstanceId()); labels.emplace_back(METRIC_LABEL_KEY_START_TIME, LogFileProfiler::mStartTime); labels.emplace_back(METRIC_LABEL_KEY_OS, OS_NAME); @@ -710,7 +709,7 @@ void LoongCollectorMonitor::Init() { }); #endif WriteMetrics::GetInstance()->PrepareMetricsRecordRef( - mMetricsRecordRef, std::move(labels), std::move(dynamicLabels)); + mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_AGENT, std::move(labels), std::move(dynamicLabels)); // init value mAgentCpu = mMetricsRecordRef.CreateDoubleGauge(METRIC_AGENT_CPU); mAgentMemory = mMetricsRecordRef.CreateIntGauge(METRIC_AGENT_MEMORY); diff --git a/core/monitor/PluginMetricManager.cpp b/core/monitor/PluginMetricManager.cpp index b47b5ddcad..f39506971e 100644 --- a/core/monitor/PluginMetricManager.cpp +++ b/core/monitor/PluginMetricManager.cpp @@ -17,7 +17,7 @@ namespace logtail { -ReentrantMetricsRecordRef PluginMetricManager::GetOrCreateReentrantMetricsRecordRef(MetricLabels labels) { +ReentrantMetricsRecordRef PluginMetricManager::GetOrCreateReentrantMetricsRecordRef(MetricLabels labels, DynamicMetricLabels dynamicLabels) { std::lock_guard lock(mutex); std::string key = GenerateKey(labels); @@ -31,7 +31,7 @@ ReentrantMetricsRecordRef PluginMetricManager::GetOrCreateReentrantMetricsRecord newLabels.insert(newLabels.end(), labels.begin(), labels.end()); ReentrantMetricsRecordRef ptr = std::make_shared(); - ptr->Init(newLabels, mMetricKeys); + ptr->Init(mDefaultCategory, newLabels, dynamicLabels, mMetricKeys); mReentrantMetricsRecordRefsMap.emplace(key, ptr); if (mSizeGauge != nullptr) { diff --git a/core/monitor/PluginMetricManager.h b/core/monitor/PluginMetricManager.h index db043fceda..dfa0a23ecb 100644 --- a/core/monitor/PluginMetricManager.h +++ b/core/monitor/PluginMetricManager.h @@ -23,10 +23,15 @@ namespace logtail { class PluginMetricManager { public: - PluginMetricManager(const MetricLabelsPtr defaultLabels, std::unordered_map metricKeys) - : mDefaultLabels(defaultLabels->begin(), defaultLabels->end()), mMetricKeys(metricKeys) {} - - ReentrantMetricsRecordRef GetOrCreateReentrantMetricsRecordRef(MetricLabels labels); + PluginMetricManager(const MetricLabelsPtr defaultLabels, + std::unordered_map metricKeys, + std::string category = MetricCategory::METRIC_CATEGORY_UNKNOWN) + : mDefaultLabels(defaultLabels->begin(), defaultLabels->end()), + mMetricKeys(metricKeys), + mDefaultCategory(category) {} + + ReentrantMetricsRecordRef GetOrCreateReentrantMetricsRecordRef(MetricLabels labels, + DynamicMetricLabels dynamicLabels = {}); void ReleaseReentrantMetricsRecordRef(MetricLabels labels); void RegisterSizeGauge(IntGaugePtr ptr) { mSizeGauge = ptr; } @@ -36,6 +41,7 @@ class PluginMetricManager { MetricLabels mDefaultLabels; std::unordered_map mMetricKeys; + std::string mDefaultCategory; std::unordered_map mReentrantMetricsRecordRefsMap; mutable std::mutex mutex; diff --git a/core/monitor/metric_constants/AgentMetrics.cpp b/core/monitor/metric_constants/AgentMetrics.cpp index f2e49630ac..ad17130e6c 100644 --- a/core/monitor/metric_constants/AgentMetrics.cpp +++ b/core/monitor/metric_constants/AgentMetrics.cpp @@ -19,7 +19,6 @@ using namespace std; namespace logtail { // label keys -const string METRIC_LABEL_KEY_METRIC_CATEGORY = "metric_category"; const string METRIC_LABEL_KEY_ALIUIDS = "aliuids"; const string METRIC_LABEL_KEY_INSTANCE_ID = "instance_id"; const string METRIC_LABEL_KEY_START_TIME = "start_time"; @@ -30,16 +29,13 @@ const string METRIC_LABEL_KEY_USER_DEFINED_ID = "user_defined_id"; const string METRIC_LABEL_KEY_UUID = "uuid"; const string METRIC_LABEL_KEY_VERSION = "version"; -// label values -const string METRIC_LABEL_KEY_METRIC_CATEGORY_AGENT = "agent"; - // metric keys -const string METRIC_AGENT_CPU = "agent_cpu_percent"; -const string METRIC_AGENT_GO_ROUTINES_TOTAL = "agent_go_routines_total"; -const string METRIC_AGENT_INSTANCE_CONFIG_TOTAL = "agent_instance_config_total"; // Not Implemented -const string METRIC_AGENT_MEMORY = "agent_memory_used_mb"; -const string METRIC_AGENT_MEMORY_GO = "agent_go_memory_used_mb"; -const string METRIC_AGENT_OPEN_FD_TOTAL = "agent_open_fd_total"; -const string METRIC_AGENT_PIPELINE_CONFIG_TOTAL = "agent_pipeline_config_total"; +const string METRIC_AGENT_CPU = "cpu"; +const string METRIC_AGENT_GO_ROUTINES_TOTAL = "go_routines_total"; +const string METRIC_AGENT_INSTANCE_CONFIG_TOTAL = "instance_config_total"; // Not Implemented +const string METRIC_AGENT_MEMORY = "memory_used_mb"; +const string METRIC_AGENT_MEMORY_GO = "go_memory_used_mb"; +const string METRIC_AGENT_OPEN_FD_TOTAL = "open_fd_total"; +const string METRIC_AGENT_PIPELINE_CONFIG_TOTAL = "pipeline_config_total"; } // namespace logtail diff --git a/core/monitor/metric_constants/ComponentMetrics.cpp b/core/monitor/metric_constants/ComponentMetrics.cpp index cbbb8ed40b..358a7a6a36 100644 --- a/core/monitor/metric_constants/ComponentMetrics.cpp +++ b/core/monitor/metric_constants/ComponentMetrics.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "MetricCommonConstants.h" #include "MetricConstants.h" using namespace std; @@ -34,7 +35,6 @@ const string METRIC_LABEL_KEY_EXACTLY_ONCE_ENABLED = "exactly_once_enabled"; const string METRIC_LABEL_KEY_GROUP_BATCH_ENABLED = "group_batch_enabled"; // label values -const string METRIC_LABEL_KEY_METRIC_CATEGORY_COMPONENT = "component"; const string METRIC_LABEL_VALUE_COMPONENT_NAME_BATCHER = "batcher"; const string METRIC_LABEL_VALUE_COMPONENT_NAME_COMPRESSOR = "compressor"; const string METRIC_LABEL_VALUE_COMPONENT_NAME_PROCESS_QUEUE = "process_queue"; @@ -43,46 +43,42 @@ const string METRIC_LABEL_VALUE_COMPONENT_NAME_SENDER_QUEUE = "sender_queue"; const string METRIC_LABEL_VALUE_COMPONENT_NAME_SERIALIZER = "serializer"; // metric keys -const string METRIC_COMPONENT_IN_EVENTS_TOTAL = "component_in_events_total"; -const string METRIC_COMPONENT_IN_SIZE_BYTES = "component_in_size_bytes"; -const string METRIC_COMPONENT_IN_ITEMS_TOTAL = "component_in_items_total"; -const string METRIC_COMPONENT_OUT_EVENTS_TOTAL = "component_out_events_total"; -const string METRIC_COMPONENT_OUT_ITEMS_TOTAL = "component_out_items_total"; -const string METRIC_COMPONENT_OUT_SIZE_BYTES = "component_out_size_bytes"; -const string METRIC_COMPONENT_TOTAL_DELAY_MS = "component_total_delay_ms"; -const string METRIC_COMPONENT_TOTAL_PROCESS_TIME_MS = "component_total_process_time_ms"; -const string METRIC_COMPONENT_DISCARDED_ITEMS_TOTAL = "component_discarded_items_total"; -const string METRIC_COMPONENT_DISCARDED_ITEMS_SIZE_BYTES = "component_discarded_item_size_bytes"; +const string& METRIC_COMPONENT_IN_EVENTS_TOTAL = METRIC_IN_EVENTS_TOTAL; +const string& METRIC_COMPONENT_IN_SIZE_BYTES = METRIC_IN_SIZE_BYTES; +const string& METRIC_COMPONENT_IN_ITEMS_TOTAL = METRIC_IN_ITEMS_TOTAL; +const string& METRIC_COMPONENT_OUT_EVENTS_TOTAL = METRIC_OUT_EVENTS_TOTAL; +const string& METRIC_COMPONENT_OUT_ITEMS_TOTAL = METRIC_OUT_ITEMS_TOTAL; +const string& METRIC_COMPONENT_OUT_SIZE_BYTES = METRIC_OUT_SIZE_BYTES; +const string& METRIC_COMPONENT_TOTAL_DELAY_MS = METRIC_TOTAL_DELAY_MS; +const string& METRIC_COMPONENT_TOTAL_PROCESS_TIME_MS = METRIC_TOTAL_PROCESS_TIME_MS; +const string& METRIC_COMPONENT_DISCARDED_ITEMS_TOTAL = METRIC_DISCARDED_ITEMS_TOTAL; +const string& METRIC_COMPONENT_DISCARDED_SIZE_BYTES = METRIC_DISCARDED_SIZE_BYTES; /********************************************************** * batcher **********************************************************/ -const string METRIC_COMPONENT_BATCHER_EVENT_BATCHES_TOTAL = "component_event_batches_total"; -const string METRIC_COMPONENT_BATCHER_BUFFERED_GROUPS_TOTAL = "component_buffered_groups_total"; -const string METRIC_COMPONENT_BATCHER_BUFFERED_EVENTS_TOTAL = "component_buffered_events_total"; -const string METRIC_COMPONENT_BATCHER_BUFFERED_SIZE_BYTES = "component_buffered_size_bytes"; -const string METRIC_COMPONENT_BATCHER_TOTAL_ADD_TIME_MS = "component_total_add_time_ms"; +const string METRIC_COMPONENT_BATCHER_EVENT_BATCHES_TOTAL = "event_batches_total"; +const string METRIC_COMPONENT_BATCHER_BUFFERED_GROUPS_TOTAL = "buffered_groups_total"; +const string METRIC_COMPONENT_BATCHER_BUFFERED_EVENTS_TOTAL = "buffered_events_total"; +const string METRIC_COMPONENT_BATCHER_BUFFERED_SIZE_BYTES = "buffered_size_bytes"; +const string METRIC_COMPONENT_BATCHER_TOTAL_ADD_TIME_MS = "total_add_time_ms"; /********************************************************** * queue **********************************************************/ -const string METRIC_COMPONENT_QUEUE_SIZE = "component_queue_size"; -const string METRIC_COMPONENT_QUEUE_SIZE_BYTES = "component_queue_size_bytes"; -const string METRIC_COMPONENT_QUEUE_VALID_TO_PUSH_FLAG = "component_valid_to_push_status"; -const string METRIC_COMPONENT_QUEUE_EXTRA_BUFFER_SIZE = "component_extra_buffer_size"; -const string METRIC_COMPONENT_QUEUE_EXTRA_BUFFER_SIZE_BYTES = "component_extra_buffer_size_bytes"; -const string METRIC_COMPONENT_QUEUE_DISCARDED_EVENTS_TOTAL = "component_discarded_events_total"; +const string METRIC_COMPONENT_QUEUE_SIZE = "queue_size"; +const string METRIC_COMPONENT_QUEUE_SIZE_BYTES = "queue_size_bytes"; +const string METRIC_COMPONENT_QUEUE_VALID_TO_PUSH_FLAG = "valid_to_push_status"; +const string METRIC_COMPONENT_QUEUE_EXTRA_BUFFER_SIZE = "extra_buffer_size"; +const string METRIC_COMPONENT_QUEUE_EXTRA_BUFFER_SIZE_BYTES = "extra_buffer_size_bytes"; +const string& METRIC_COMPONENT_QUEUE_DISCARDED_EVENTS_TOTAL = METRIC_DISCARDED_EVENTS_TOTAL; -const string METRIC_COMPONENT_QUEUE_FETCHED_ITEMS_TOTAL = "component_fetched_items_total"; -const string METRIC_COMPONENT_QUEUE_FETCH_TIMES_TOTAL = "component_fetch_times_total"; -const string METRIC_COMPONENT_QUEUE_VALID_FETCH_TIMES_TOTAL = "component_valid_fetch_times_total"; -const string METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_REGION_LIMITER_TIMES_TOTAL - = "component_fetch_rejected_by_region_limiter_times_total"; -const string METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_PROJECT_LIMITER_TIMES_TOTAL - = "component_fetch_rejected_by_project_limiter_times_total"; -const string METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_LOGSTORE_LIMITER_TIMES_TOTAL - = "component_fetch_rejected_by_logstore_limiter_times_total"; -const string METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_RATE_LIMITER_TIMES_TOTAL - = "component_fetch_rejected_by_rate_limiter_times_total"; +const string METRIC_COMPONENT_QUEUE_FETCHED_ITEMS_TOTAL = "fetched_items_total"; +const string METRIC_COMPONENT_QUEUE_FETCH_TIMES_TOTAL = "fetch_times_total"; +const string METRIC_COMPONENT_QUEUE_VALID_FETCH_TIMES_TOTAL = "valid_fetch_times_total"; +const string METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_REGION_LIMITER_TIMES_TOTAL = "region_reject_times_total"; +const string METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_PROJECT_LIMITER_TIMES_TOTAL = "project_reject_times_total"; +const string METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_LOGSTORE_LIMITER_TIMES_TOTAL = "logstore_reject_times_total"; +const string METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_RATE_LIMITER_TIMES_TOTAL = "rate_reject_times_total"; } // namespace logtail diff --git a/core/monitor/metric_constants/MetricCommonConstants.cpp b/core/monitor/metric_constants/MetricCommonConstants.cpp new file mode 100644 index 0000000000..a2a1e3542d --- /dev/null +++ b/core/monitor/metric_constants/MetricCommonConstants.cpp @@ -0,0 +1,35 @@ +// Copyright 2024 iLogtail Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "MetricCommonConstants.h" + +using namespace std; + +namespace logtail { + +const string METRIC_DISCARDED_EVENTS_TOTAL = "discarded_events_total"; +const string METRIC_DISCARDED_ITEMS_TOTAL = "discarded_items_total"; +const string METRIC_DISCARDED_SIZE_BYTES = "discarded_size_bytes"; +const string METRIC_IN_EVENTS_TOTAL = "in_events_total"; +const string METRIC_IN_EVENT_GROUPS_TOTAL = "in_event_groups_total"; +const string METRIC_IN_ITEMS_TOTAL = "in_items_total"; +const string METRIC_IN_SIZE_BYTES = "in_size_bytes"; +const string METRIC_OUT_EVENTS_TOTAL = "out_events_total"; +const string METRIC_OUT_EVENT_GROUPS_TOTAL = "out_event_groups_total"; +const string METRIC_OUT_ITEMS_TOTAL = "out_items_total"; +const string METRIC_OUT_SIZE_BYTES = "out_size_bytes"; +const string METRIC_TOTAL_DELAY_MS = "total_delay_ms"; +const string METRIC_TOTAL_PROCESS_TIME_MS = "total_process_time_ms"; + +} \ No newline at end of file diff --git a/core/monitor/metric_constants/MetricCommonConstants.h b/core/monitor/metric_constants/MetricCommonConstants.h new file mode 100644 index 0000000000..4a78b4e9d1 --- /dev/null +++ b/core/monitor/metric_constants/MetricCommonConstants.h @@ -0,0 +1,36 @@ +/* + * Copyright 2024 iLogtail Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include + +namespace logtail { + +extern const std::string METRIC_DISCARDED_EVENTS_TOTAL; +extern const std::string METRIC_DISCARDED_ITEMS_TOTAL; +extern const std::string METRIC_DISCARDED_SIZE_BYTES; +extern const std::string METRIC_IN_EVENTS_TOTAL; +extern const std::string METRIC_IN_EVENT_GROUPS_TOTAL; +extern const std::string METRIC_IN_ITEMS_TOTAL; +extern const std::string METRIC_IN_SIZE_BYTES; +extern const std::string METRIC_OUT_EVENTS_TOTAL; +extern const std::string METRIC_OUT_EVENT_GROUPS_TOTAL; +extern const std::string METRIC_OUT_ITEMS_TOTAL; +extern const std::string METRIC_OUT_SIZE_BYTES; +extern const std::string METRIC_TOTAL_DELAY_MS; +extern const std::string METRIC_TOTAL_PROCESS_TIME_MS; + +} \ No newline at end of file diff --git a/core/monitor/metric_constants/MetricConstants.h b/core/monitor/metric_constants/MetricConstants.h index 84740a09b9..73a62d30bf 100644 --- a/core/monitor/metric_constants/MetricConstants.h +++ b/core/monitor/metric_constants/MetricConstants.h @@ -19,16 +19,6 @@ namespace logtail { -// label keys -extern const std::string METRIC_LABEL_KEY_METRIC_CATEGORY; - -// label values -extern const std::string METRIC_LABEL_KEY_METRIC_CATEGORY_AGENT; -extern const std::string METRIC_LABEL_KEY_METRIC_CATEGORY_COMPONENT; -extern const std::string METRIC_LABEL_KEY_METRIC_CATEGORY_PIPELINE; -extern const std::string METRIC_LABEL_KEY_METRIC_CATEGORY_PLUGIN; -extern const std::string METRIC_LABEL_KEY_METRIC_CATEGORY_RUNNER; - ////////////////////////////////////////////////////////////////////////// // agent ////////////////////////////////////////////////////////////////////////// @@ -81,14 +71,14 @@ extern const std::string METRIC_LABEL_KEY_PLUGIN_ID; extern const std::string METRIC_LABEL_KEY_PLUGIN_TYPE; // metric keys -extern const std::string METRIC_PLUGIN_IN_EVENTS_TOTAL; -extern const std::string METRIC_PLUGIN_IN_EVENT_GROUPS_TOTAL; -extern const std::string METRIC_PLUGIN_IN_SIZE_BYTES; -extern const std::string METRIC_PLUGIN_OUT_EVENTS_TOTAL; -extern const std::string METRIC_PLUGIN_OUT_EVENT_GROUPS_TOTAL; -extern const std::string METRIC_PLUGIN_OUT_SIZE_BYTES; -extern const std::string METRIC_PLUGIN_TOTAL_DELAY_MS; -extern const std::string METRIC_PLUGIN_TOTAL_PROCESS_TIME_MS; +extern const std::string& METRIC_PLUGIN_IN_EVENTS_TOTAL; +extern const std::string& METRIC_PLUGIN_IN_EVENT_GROUPS_TOTAL; +extern const std::string& METRIC_PLUGIN_IN_SIZE_BYTES; +extern const std::string& METRIC_PLUGIN_OUT_EVENTS_TOTAL; +extern const std::string& METRIC_PLUGIN_OUT_EVENT_GROUPS_TOTAL; +extern const std::string& METRIC_PLUGIN_OUT_SIZE_BYTES; +extern const std::string& METRIC_PLUGIN_TOTAL_DELAY_MS; +extern const std::string& METRIC_PLUGIN_TOTAL_PROCESS_TIME_MS; /********************************************************** * input_file @@ -159,7 +149,7 @@ extern const std::string METRIC_PLUGIN_EBPF_PROCESS_CACHE_MISS_TOTAL; /********************************************************** * all processor (所有解析类的处理插件通用指标。Todo:目前统计还不全、不准确) **********************************************************/ -extern const std::string METRIC_PLUGIN_DISCARDED_EVENTS_TOTAL; +extern const std::string& METRIC_PLUGIN_DISCARDED_EVENTS_TOTAL; extern const std::string METRIC_PLUGIN_OUT_FAILED_EVENTS_TOTAL; extern const std::string METRIC_PLUGIN_OUT_KEY_NOT_FOUND_EVENTS_TOTAL; extern const std::string METRIC_PLUGIN_OUT_SUCCESSFUL_EVENTS_TOTAL; @@ -230,16 +220,16 @@ extern const std::string METRIC_LABEL_VALUE_COMPONENT_NAME_SENDER_QUEUE; extern const std::string METRIC_LABEL_VALUE_COMPONENT_NAME_SERIALIZER; // metric keys -extern const std::string METRIC_COMPONENT_IN_EVENTS_TOTAL; -extern const std::string METRIC_COMPONENT_IN_SIZE_BYTES; -extern const std::string METRIC_COMPONENT_IN_ITEMS_TOTAL; -extern const std::string METRIC_COMPONENT_OUT_EVENTS_TOTAL; -extern const std::string METRIC_COMPONENT_OUT_ITEMS_TOTAL; -extern const std::string METRIC_COMPONENT_OUT_SIZE_BYTES; -extern const std::string METRIC_COMPONENT_TOTAL_DELAY_MS; -extern const std::string METRIC_COMPONENT_TOTAL_PROCESS_TIME_MS; -extern const std::string METRIC_COMPONENT_DISCARDED_ITEMS_TOTAL; -extern const std::string METRIC_COMPONENT_DISCARDED_ITEMS_SIZE_BYTES; +extern const std::string& METRIC_COMPONENT_IN_EVENTS_TOTAL; +extern const std::string& METRIC_COMPONENT_IN_SIZE_BYTES; +extern const std::string& METRIC_COMPONENT_IN_ITEMS_TOTAL; +extern const std::string& METRIC_COMPONENT_OUT_EVENTS_TOTAL; +extern const std::string& METRIC_COMPONENT_OUT_ITEMS_TOTAL; +extern const std::string& METRIC_COMPONENT_OUT_SIZE_BYTES; +extern const std::string& METRIC_COMPONENT_TOTAL_DELAY_MS; +extern const std::string& METRIC_COMPONENT_TOTAL_PROCESS_TIME_MS; +extern const std::string& METRIC_COMPONENT_DISCARDED_ITEMS_TOTAL; +extern const std::string& METRIC_COMPONENT_DISCARDED_SIZE_BYTES; /********************************************************** * batcher @@ -258,7 +248,7 @@ extern const std::string METRIC_COMPONENT_QUEUE_SIZE_BYTES; extern const std::string METRIC_COMPONENT_QUEUE_VALID_TO_PUSH_FLAG; extern const std::string METRIC_COMPONENT_QUEUE_EXTRA_BUFFER_SIZE; extern const std::string METRIC_COMPONENT_QUEUE_EXTRA_BUFFER_SIZE_BYTES; -extern const std::string METRIC_COMPONENT_QUEUE_DISCARDED_EVENTS_TOTAL; +extern const std::string& METRIC_COMPONENT_QUEUE_DISCARDED_EVENTS_TOTAL; extern const std::string METRIC_COMPONENT_QUEUE_FETCHED_ITEMS_TOTAL; extern const std::string METRIC_COMPONENT_QUEUE_FETCH_TIMES_TOTAL; @@ -274,6 +264,7 @@ extern const std::string METRIC_COMPONENT_QUEUE_FETCH_REJECTED_BY_RATE_LIMITER_T // label keys extern const std::string METRIC_LABEL_KEY_RUNNER_NAME; +extern const std::string METRIC_LABEL_KEY_THREAD_NO; // label values extern const std::string METRIC_LABEL_VALUE_RUNNER_NAME_FILE_SERVER; @@ -284,22 +275,26 @@ extern const std::string METRIC_LABEL_VALUE_RUNNER_NAME_PROMETHEUS; extern const std::string METRIC_LABEL_VALUE_RUNNER_NAME_EBPF_SERVER; // metric keys -extern const std::string METRIC_RUNNER_IN_EVENTS_TOTAL; -extern const std::string METRIC_RUNNER_IN_EVENT_GROUPS_TOTAL; -extern const std::string METRIC_RUNNER_IN_SIZE_BYTES; -extern const std::string METRIC_RUNNER_IN_ITEMS_TOTAL; +extern const std::string& METRIC_RUNNER_IN_EVENTS_TOTAL; +extern const std::string& METRIC_RUNNER_IN_EVENT_GROUPS_TOTAL; +extern const std::string& METRIC_RUNNER_IN_SIZE_BYTES; +extern const std::string& METRIC_RUNNER_IN_ITEMS_TOTAL; extern const std::string METRIC_RUNNER_LAST_RUN_TIME; -extern const std::string METRIC_RUNNER_OUT_ITEMS_TOTAL; -extern const std::string METRIC_RUNNER_TOTAL_DELAY_MS; +extern const std::string& METRIC_RUNNER_OUT_ITEMS_TOTAL; +extern const std::string& METRIC_RUNNER_TOTAL_DELAY_MS; +extern const std::string METRIC_RUNNER_CLIENT_REGISTER_STATE; +extern const std::string METRIC_RUNNER_CLIENT_REGISTER_RETRY_TOTAL; +extern const std::string METRIC_RUNNER_JOBS_TOTAL; + +/********************************************************** + * all sinks + **********************************************************/ extern const std::string METRIC_RUNNER_SINK_OUT_SUCCESSFUL_ITEMS_TOTAL; extern const std::string METRIC_RUNNER_SINK_OUT_FAILED_ITEMS_TOTAL; extern const std::string METRIC_RUNNER_SINK_SUCCESSFUL_ITEM_TOTAL_RESPONSE_TIME_MS; extern const std::string METRIC_RUNNER_SINK_FAILED_ITEM_TOTAL_RESPONSE_TIME_MS; extern const std::string METRIC_RUNNER_SINK_SENDING_ITEMS_TOTAL; extern const std::string METRIC_RUNNER_SINK_SEND_CONCURRENCY; -extern const std::string METRIC_RUNNER_CLIENT_REGISTER_STATE; -extern const std::string METRIC_RUNNER_CLIENT_REGISTER_RETRY_TOTAL; -extern const std::string METRIC_RUNNER_JOBS_TOTAL; /********************************************************** * flusher runner diff --git a/core/monitor/metric_constants/PipelineMetrics.cpp b/core/monitor/metric_constants/PipelineMetrics.cpp index 85fd7f3e8f..b82904101c 100644 --- a/core/monitor/metric_constants/PipelineMetrics.cpp +++ b/core/monitor/metric_constants/PipelineMetrics.cpp @@ -23,18 +23,15 @@ const string METRIC_LABEL_KEY_LOGSTORE = "logstore"; const string METRIC_LABEL_KEY_PIPELINE_NAME = "pipeline_name"; const string METRIC_LABEL_KEY_REGION = "region"; -// label values -const string METRIC_LABEL_KEY_METRIC_CATEGORY_PIPELINE = "pipeline"; - // metric keys -const string METRIC_PIPELINE_PROCESSORS_IN_EVENTS_TOTAL = "pipeline_processors_in_events_total"; -const string METRIC_PIPELINE_PROCESSORS_IN_EVENT_GROUPS_TOTAL = "pipeline_processors_in_event_groups_total"; -const string METRIC_PIPELINE_PROCESSORS_IN_SIZE_BYTES = "pipeline_processors_in_size_bytes"; -const string METRIC_PIPELINE_PROCESSORS_TOTAL_PROCESS_TIME_MS = "pipeline_processors_total_process_time_ms"; -const string METRIC_PIPELINE_FLUSHERS_IN_EVENTS_TOTAL = "pipeline_flushers_in_events_total"; -const string METRIC_PIPELINE_FLUSHERS_IN_EVENT_GROUPS_TOTAL = "pipeline_flushers_in_event_groups_total"; -const string METRIC_PIPELINE_FLUSHERS_IN_SIZE_BYTES = "pipeline_flushers_in_size_bytes"; -const string METRIC_PIPELINE_FLUSHERS_TOTAL_PACKAGE_TIME_MS = "pipeline_flushers_total_package_time_ms"; -const string METRIC_PIPELINE_START_TIME = "pipeline_start_time"; +const string METRIC_PIPELINE_PROCESSORS_IN_EVENTS_TOTAL = "processor_in_events_total"; +const string METRIC_PIPELINE_PROCESSORS_IN_EVENT_GROUPS_TOTAL = "processor_in_event_groups_total"; +const string METRIC_PIPELINE_PROCESSORS_IN_SIZE_BYTES = "processor_in_size_bytes"; +const string METRIC_PIPELINE_PROCESSORS_TOTAL_PROCESS_TIME_MS = "processor_total_process_time_ms"; +const string METRIC_PIPELINE_FLUSHERS_IN_EVENTS_TOTAL = "flusher_in_events_total"; +const string METRIC_PIPELINE_FLUSHERS_IN_EVENT_GROUPS_TOTAL = "flusher_in_event_groups_total"; +const string METRIC_PIPELINE_FLUSHERS_IN_SIZE_BYTES = "flusher_in_size_bytes"; +const string METRIC_PIPELINE_FLUSHERS_TOTAL_PACKAGE_TIME_MS = "flusher_total_package_time_ms"; +const string METRIC_PIPELINE_START_TIME = "start_time"; } // namespace logtail diff --git a/core/monitor/metric_constants/PluginMetrics.cpp b/core/monitor/metric_constants/PluginMetrics.cpp index 448a551cae..329e487539 100644 --- a/core/monitor/metric_constants/PluginMetrics.cpp +++ b/core/monitor/metric_constants/PluginMetrics.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "MetricCommonConstants.h" #include "MetricConstants.h" using namespace std; @@ -22,18 +23,15 @@ namespace logtail { const string METRIC_LABEL_KEY_PLUGIN_ID = "plugin_id"; const string METRIC_LABEL_KEY_PLUGIN_TYPE = "plugin_type"; -// label values -const string METRIC_LABEL_KEY_METRIC_CATEGORY_PLUGIN = "plugin"; - // metric keys -const string METRIC_PLUGIN_IN_EVENTS_TOTAL = "plugin_in_events_total"; -const string METRIC_PLUGIN_IN_EVENT_GROUPS_TOTAL = "plugin_in_event_groups_total"; -const string METRIC_PLUGIN_IN_SIZE_BYTES = "plugin_in_size_bytes"; -const string METRIC_PLUGIN_OUT_EVENTS_TOTAL = "plugin_out_events_total"; -const string METRIC_PLUGIN_OUT_EVENT_GROUPS_TOTAL = "plugin_out_event_groups_total"; -const string METRIC_PLUGIN_OUT_SIZE_BYTES = "plugin_out_size_bytes"; -const string METRIC_PLUGIN_TOTAL_DELAY_MS = "plugin_total_delay_ms"; -const string METRIC_PLUGIN_TOTAL_PROCESS_TIME_MS = "plugin_total_process_time_ms"; +const string& METRIC_PLUGIN_IN_EVENTS_TOTAL = METRIC_IN_EVENTS_TOTAL; +const string& METRIC_PLUGIN_IN_EVENT_GROUPS_TOTAL = METRIC_IN_EVENT_GROUPS_TOTAL; +const string& METRIC_PLUGIN_IN_SIZE_BYTES = METRIC_IN_SIZE_BYTES; +const string& METRIC_PLUGIN_OUT_EVENTS_TOTAL = METRIC_OUT_EVENTS_TOTAL; +const string& METRIC_PLUGIN_OUT_EVENT_GROUPS_TOTAL = METRIC_OUT_EVENT_GROUPS_TOTAL; +const string& METRIC_PLUGIN_OUT_SIZE_BYTES = METRIC_OUT_SIZE_BYTES; +const string& METRIC_PLUGIN_TOTAL_DELAY_MS = METRIC_TOTAL_DELAY_MS; +const string& METRIC_PLUGIN_TOTAL_PROCESS_TIME_MS = METRIC_TOTAL_PROCESS_TIME_MS; /********************************************************** * input_file @@ -43,9 +41,9 @@ const string METRIC_LABEL_KEY_FILE_DEV = "file_dev"; const string METRIC_LABEL_KEY_FILE_INODE = "file_inode"; const string METRIC_LABEL_KEY_FILE_NAME = "file_name"; -const string METRIC_PLUGIN_MONITOR_FILE_TOTAL = "plugin_monitor_file_total"; -const string METRIC_PLUGIN_SOURCE_READ_OFFSET_BYTES = "plugin_source_read_offset_bytes"; -const string METRIC_PLUGIN_SOURCE_SIZE_BYTES = "plugin_source_size_bytes"; +const string METRIC_PLUGIN_MONITOR_FILE_TOTAL = "monitor_file_total"; +const string METRIC_PLUGIN_SOURCE_READ_OFFSET_BYTES = "read_offset_bytes"; +const string METRIC_PLUGIN_SOURCE_SIZE_BYTES = "size_bytes"; /********************************************************** * input_prometheus @@ -57,11 +55,11 @@ const std::string METRIC_LABEL_KEY_SERVICE_PORT = "service_port"; const std::string METRIC_LABEL_KEY_STATUS = "status"; const std::string METRIC_LABEL_KEY_INSTANCE = "instance"; -const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS = "plugin_prom_subscribe_targets"; -const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL = "plugin_prom_subscribe_total"; -const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TIME_MS = "plugin_prom_subscribe_time_ms"; -const std::string METRIC_PLUGIN_PROM_SCRAPE_TIME_MS = "plugin_prom_scrape_time_ms"; -const std::string METRIC_PLUGIN_PROM_SCRAPE_DELAY_TOTAL = "plugin_prom_scrape_delay_total"; +const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS = "prom_subscribe_targets"; +const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL = "prom_subscribe_total"; +const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TIME_MS = "prom_subscribe_time_ms"; +const std::string METRIC_PLUGIN_PROM_SCRAPE_TIME_MS = "prom_scrape_time_ms"; +const std::string METRIC_PLUGIN_PROM_SCRAPE_DELAY_TOTAL = "prom_scrape_delay_total"; /********************************************************** * input_ebpf @@ -92,69 +90,68 @@ const string METRIC_LABEL_VALUE_PLUGIN_TYPE_FILE_SECURITY = "file_security"; const string METRIC_LABEL_VALUE_PLUGIN_TYPE_PROCESS_OBSERVER = "process_observer"; const string METRIC_LABEL_VALUE_PLUGIN_TYPE_PROCESS_SECURITY = "process_security"; -const string METRIC_PLUGIN_EBPF_LOSS_KERNEL_EVENTS_TOTAL = "plugin_ebpf_loss_kernel_events_total"; -const string METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_CONNTRACKER_NUM = "plugin_network_observer_conntracker_num"; -const string METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_WORKER_HANDLE_EVENTS_TOTAL = "plugin_network_observer_worker_handle_events_total"; -const string METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_PROTOCOL_PARSE_RECORDS_TOTAL = "plugin_network_observer_parse_records_total"; -const string METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_AGGREGATE_EVENTS_TOTAL = "plugin_network_observer_aggregate_events_total"; -const string METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_AGGREGATE_KEY_NUM = "plugin_network_observer_aggregate_key_num"; -const string METRIC_PLUGIN_EBPF_PROCESS_CACHE_ENTRIES_NUM = "plugin_process_cache_entries_num"; -const string METRIC_PLUGIN_EBPF_PROCESS_CACHE_MISS_TOTAL = "plugin_process_cache_miss_total"; +const string METRIC_PLUGIN_EBPF_LOSS_KERNEL_EVENTS_TOTAL = "ebpf_loss_kernel_events_total"; +const string METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_CONNTRACKER_NUM = "conntracker_num"; +const string METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_WORKER_HANDLE_EVENTS_TOTAL = "handle_events_total"; +const string METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_PROTOCOL_PARSE_RECORDS_TOTAL = "parse_records_total"; +const string METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_AGGREGATE_EVENTS_TOTAL = "aggregate_events_total"; +const string METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_AGGREGATE_KEY_NUM = "aggregate_key_num"; +const string METRIC_PLUGIN_EBPF_PROCESS_CACHE_ENTRIES_NUM = "process_cache_entries_num"; +const string METRIC_PLUGIN_EBPF_PROCESS_CACHE_MISS_TOTAL = "process_cache_miss_total"; /********************************************************** * all processor (所有解析类的处理插件通用指标。Todo:目前统计还不全、不准确) **********************************************************/ -const string METRIC_PLUGIN_DISCARDED_EVENTS_TOTAL = "plugin_discarded_events_total"; -const string METRIC_PLUGIN_OUT_FAILED_EVENTS_TOTAL = "plugin_out_failed_events_total"; -const string METRIC_PLUGIN_OUT_KEY_NOT_FOUND_EVENTS_TOTAL = "plugin_out_key_not_found_events_total"; -const string METRIC_PLUGIN_OUT_SUCCESSFUL_EVENTS_TOTAL = "plugin_out_successful_events_total"; +const string& METRIC_PLUGIN_DISCARDED_EVENTS_TOTAL = METRIC_DISCARDED_EVENTS_TOTAL; +const string METRIC_PLUGIN_OUT_FAILED_EVENTS_TOTAL = "out_failed_events_total"; +const string METRIC_PLUGIN_OUT_KEY_NOT_FOUND_EVENTS_TOTAL = "out_key_not_found_events_total"; +const string METRIC_PLUGIN_OUT_SUCCESSFUL_EVENTS_TOTAL = "out_successful_events_total"; /********************************************************** * processor_parse_apsara_native * processor_parse_timestamp_native **********************************************************/ -const string METRIC_PLUGIN_HISTORY_FAILURE_TOTAL = "plugin_history_failure_total"; +const string METRIC_PLUGIN_HISTORY_FAILURE_TOTAL = "history_failure_total"; /********************************************************** * processor_split_multiline_log_string_native **********************************************************/ -const string METRIC_PLUGIN_MATCHED_EVENTS_TOTAL = "plugin_matched_events_total"; -const string METRIC_PLUGIN_MATCHED_LINES_TOTAL = "plugin_matched_lines_total"; -const string METRIC_PLUGIN_UNMATCHED_LINES_TOTAL = "plugin_unmatched_lines_total"; +const string METRIC_PLUGIN_MATCHED_EVENTS_TOTAL = "matched_events_total"; +const string METRIC_PLUGIN_MATCHED_LINES_TOTAL = "matched_lines_total"; +const string METRIC_PLUGIN_UNMATCHED_LINES_TOTAL = "unmatched_lines_total"; /********************************************************** * processor_merge_multiline_log_native **********************************************************/ -const string METRIC_PLUGIN_MERGED_EVENTS_TOTAL = "plugin_merged_events_total"; -const string METRIC_PLUGIN_UNMATCHED_EVENTS_TOTAL = "plugin_unmatched_events_total"; +const string METRIC_PLUGIN_MERGED_EVENTS_TOTAL = "merged_events_total"; +const string METRIC_PLUGIN_UNMATCHED_EVENTS_TOTAL = "unmatched_events_total"; /********************************************************** * processor_parse_container_log_native **********************************************************/ -const string METRIC_PLUGIN_PARSE_STDERR_TOTAL = "plugin_parse_stderr_total"; -const string METRIC_PLUGIN_PARSE_STDOUT_TOTAL = "plugin_parse_stdout_total"; +const string METRIC_PLUGIN_PARSE_STDERR_TOTAL = "parse_stderr_total"; +const string METRIC_PLUGIN_PARSE_STDOUT_TOTAL = "parse_stdout_total"; /********************************************************** * all flusher (所有发送插件通用指标) **********************************************************/ -const string METRIC_PLUGIN_FLUSHER_TOTAL_PACKAGE_TIME_MS = "plugin_flusher_total_package_time_ms"; -const string METRIC_PLUGIN_FLUSHER_OUT_EVENT_GROUPS_TOTAL = "plugin_flusher_send_total"; -const string METRIC_PLUGIN_FLUSHER_SEND_DONE_TOTAL = "plugin_flusher_send_done_total"; -const string METRIC_PLUGIN_FLUSHER_SUCCESS_TOTAL = "plugin_flusher_success_total"; -const string METRIC_PLUGIN_FLUSHER_NETWORK_ERROR_TOTAL = "plugin_flusher_network_error_total"; -const string METRIC_PLUGIN_FLUSHER_SERVER_ERROR_TOTAL = "plugin_flusher_server_error_total"; -const string METRIC_PLUGIN_FLUSHER_UNAUTH_ERROR_TOTAL = "plugin_flusher_unauth_error_total"; -const string METRIC_PLUGIN_FLUSHER_PARAMS_ERROR_TOTAL = "plugin_flusher_params_error_total"; -const string METRIC_PLUGIN_FLUSHER_OTHER_ERROR_TOTAL = "plugin_flusher_other_error_total"; +const string METRIC_PLUGIN_FLUSHER_TOTAL_PACKAGE_TIME_MS = "total_package_time_ms"; +const string METRIC_PLUGIN_FLUSHER_OUT_EVENT_GROUPS_TOTAL = "send_total"; +const string METRIC_PLUGIN_FLUSHER_SEND_DONE_TOTAL = "send_done_total"; +const string METRIC_PLUGIN_FLUSHER_SUCCESS_TOTAL = "success_total"; +const string METRIC_PLUGIN_FLUSHER_NETWORK_ERROR_TOTAL = "network_error_total"; +const string METRIC_PLUGIN_FLUSHER_SERVER_ERROR_TOTAL = "server_error_total"; +const string METRIC_PLUGIN_FLUSHER_UNAUTH_ERROR_TOTAL = "unauth_error_total"; +const string METRIC_PLUGIN_FLUSHER_PARAMS_ERROR_TOTAL = "params_error_total"; +const string METRIC_PLUGIN_FLUSHER_OTHER_ERROR_TOTAL = "other_error_total"; /********************************************************** * flusher_sls **********************************************************/ -const string METRIC_PLUGIN_FLUSHER_SLS_SHARD_WRITE_QUOTA_ERROR_TOTAL - = "plugin_flusher_sls_shard_write_quota_error_total"; -const string METRIC_PLUGIN_FLUSHER_SLS_PROJECT_QUOTA_ERROR_TOTAL = "plugin_flusher_sls_project_quota_error_total"; -const string METRIC_PLUGIN_FLUSHER_SLS_SEQUENCE_ID_ERROR_TOTAL = "plugin_flusher_sls_sequence_id_error_total"; -const string METRIC_PLUGIN_FLUSHER_SLS_REQUEST_EXPRIRED_ERROR_TOTAL = "plugin_flusher_sls_request_exprired_error_total"; +const string METRIC_PLUGIN_FLUSHER_SLS_SHARD_WRITE_QUOTA_ERROR_TOTAL = "shard_write_quota_error_total"; +const string METRIC_PLUGIN_FLUSHER_SLS_PROJECT_QUOTA_ERROR_TOTAL = "project_quota_error_total"; +const string METRIC_PLUGIN_FLUSHER_SLS_SEQUENCE_ID_ERROR_TOTAL = "sequence_id_error_total"; +const string METRIC_PLUGIN_FLUSHER_SLS_REQUEST_EXPRIRED_ERROR_TOTAL = "request_exprired_error_total"; } // namespace logtail \ No newline at end of file diff --git a/core/monitor/metric_constants/RunnerMetrics.cpp b/core/monitor/metric_constants/RunnerMetrics.cpp index 8fb8519167..c1aead8643 100644 --- a/core/monitor/metric_constants/RunnerMetrics.cpp +++ b/core/monitor/metric_constants/RunnerMetrics.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "MetricCommonConstants.h" #include "MetricConstants.h" using namespace std; @@ -20,9 +21,9 @@ namespace logtail { // label keys const string METRIC_LABEL_KEY_RUNNER_NAME = "runner_name"; +const string METRIC_LABEL_KEY_THREAD_NO = "thread_no"; // label values -const string METRIC_LABEL_KEY_METRIC_CATEGORY_RUNNER = "runner"; const string METRIC_LABEL_VALUE_RUNNER_NAME_FILE_SERVER = "file_server"; const string METRIC_LABEL_VALUE_RUNNER_NAME_FLUSHER = "flusher_runner"; const string METRIC_LABEL_VALUE_RUNNER_NAME_HTTP_SINK = "http_sink"; @@ -31,49 +32,48 @@ const string METRIC_LABEL_VALUE_RUNNER_NAME_PROMETHEUS = "prometheus_runner"; const string METRIC_LABEL_VALUE_RUNNER_NAME_EBPF_SERVER = "ebpf_server"; // metric keys -const string METRIC_RUNNER_IN_EVENTS_TOTAL = "runner_in_events_total"; -const string METRIC_RUNNER_IN_EVENT_GROUPS_TOTAL = "runner_in_event_groups_total"; -const string METRIC_RUNNER_IN_SIZE_BYTES = "runner_in_size_bytes"; -const string METRIC_RUNNER_IN_ITEMS_TOTAL = "runner_in_items_total"; -const string METRIC_RUNNER_LAST_RUN_TIME = "runner_last_run_time"; -const string METRIC_RUNNER_OUT_ITEMS_TOTAL = "runner_out_items_total"; -const string METRIC_RUNNER_TOTAL_DELAY_MS = "runner_total_delay_ms"; -const string METRIC_RUNNER_CLIENT_REGISTER_STATE = "runner_client_register_state"; -const string METRIC_RUNNER_CLIENT_REGISTER_RETRY_TOTAL = "runner_client_register_retry_total"; -const string METRIC_RUNNER_JOBS_TOTAL = "runner_jobs_total"; +const string& METRIC_RUNNER_IN_EVENTS_TOTAL = METRIC_IN_EVENTS_TOTAL; +const string& METRIC_RUNNER_IN_EVENT_GROUPS_TOTAL = METRIC_IN_EVENT_GROUPS_TOTAL; +const string& METRIC_RUNNER_IN_SIZE_BYTES = METRIC_IN_SIZE_BYTES; +const string& METRIC_RUNNER_IN_ITEMS_TOTAL = METRIC_IN_ITEMS_TOTAL; +const string METRIC_RUNNER_LAST_RUN_TIME = "last_run_time"; +const string& METRIC_RUNNER_OUT_ITEMS_TOTAL = METRIC_OUT_ITEMS_TOTAL; +const string& METRIC_RUNNER_TOTAL_DELAY_MS = METRIC_TOTAL_DELAY_MS; +const string METRIC_RUNNER_CLIENT_REGISTER_STATE = "client_register_state"; +const string METRIC_RUNNER_CLIENT_REGISTER_RETRY_TOTAL = "client_register_retry_total"; +const string METRIC_RUNNER_JOBS_TOTAL = "jobs_total"; /********************************************************** * all sinks **********************************************************/ -const string METRIC_RUNNER_SINK_OUT_SUCCESSFUL_ITEMS_TOTAL = "runner_out_successful_items_total"; -const string METRIC_RUNNER_SINK_OUT_FAILED_ITEMS_TOTAL = "runner_out_failed_items_total"; -const string METRIC_RUNNER_SINK_SUCCESSFUL_ITEM_TOTAL_RESPONSE_TIME_MS = "runner_successful_item_total_response_time_ms"; -const string METRIC_RUNNER_SINK_FAILED_ITEM_TOTAL_RESPONSE_TIME_MS = "runner_failed_item_total_response_time_ms"; -const string METRIC_RUNNER_SINK_SENDING_ITEMS_TOTAL = "runner_sending_items_total"; -const string METRIC_RUNNER_SINK_SEND_CONCURRENCY = "runner_send_concurrency"; +const string METRIC_RUNNER_SINK_OUT_SUCCESSFUL_ITEMS_TOTAL = "out_successful_items_total"; +const string METRIC_RUNNER_SINK_OUT_FAILED_ITEMS_TOTAL = "out_failed_items_total"; +const string METRIC_RUNNER_SINK_SUCCESSFUL_ITEM_TOTAL_RESPONSE_TIME_MS = "successful_response_time_ms"; +const string METRIC_RUNNER_SINK_FAILED_ITEM_TOTAL_RESPONSE_TIME_MS = "failed_response_time_ms"; +const string METRIC_RUNNER_SINK_SENDING_ITEMS_TOTAL = "sending_items_total"; +const string METRIC_RUNNER_SINK_SEND_CONCURRENCY = "send_concurrency"; /********************************************************** * flusher runner **********************************************************/ -const string METRIC_RUNNER_FLUSHER_IN_RAW_SIZE_BYTES = "runner_in_raw_size_bytes"; -const string METRIC_RUNNER_FLUSHER_WAITING_ITEMS_TOTAL = "runner_waiting_items_total"; +const string METRIC_RUNNER_FLUSHER_IN_RAW_SIZE_BYTES = "in_raw_size_bytes"; +const string METRIC_RUNNER_FLUSHER_WAITING_ITEMS_TOTAL = "waiting_items_total"; /********************************************************** * file server **********************************************************/ -const string METRIC_RUNNER_FILE_WATCHED_DIRS_TOTAL = "runner_watched_dirs_total"; -const string METRIC_RUNNER_FILE_ACTIVE_READERS_TOTAL = "runner_active_readers_total"; -const string METRIC_RUNNER_FILE_ENABLE_FILE_INCLUDED_BY_MULTI_CONFIGS_FLAG - = "runner_enable_file_included_by_multi_configs"; -const string METRIC_RUNNER_FILE_POLLING_MODIFY_CACHE_SIZE = "runner_polling_modify_cache_size"; -const string METRIC_RUNNER_FILE_POLLING_DIR_CACHE_SIZE = "runner_polling_dir_cache_size"; -const string METRIC_RUNNER_FILE_POLLING_FILE_CACHE_SIZE = "runner_polling_file_cache_size"; +const string METRIC_RUNNER_FILE_WATCHED_DIRS_TOTAL = "watched_dirs_total"; +const string METRIC_RUNNER_FILE_ACTIVE_READERS_TOTAL = "active_readers_total"; +const string METRIC_RUNNER_FILE_ENABLE_FILE_INCLUDED_BY_MULTI_CONFIGS_FLAG = "enable_multi_configs"; +const string METRIC_RUNNER_FILE_POLLING_MODIFY_CACHE_SIZE = "polling_modify_cache_size"; +const string METRIC_RUNNER_FILE_POLLING_DIR_CACHE_SIZE = "polling_dir_cache_size"; +const string METRIC_RUNNER_FILE_POLLING_FILE_CACHE_SIZE = "polling_file_cache_size"; /********************************************************** * ebpf server **********************************************************/ -const string METRIC_RUNNER_EBPF_START_PLUGIN_TOTAL = "runner_start_plugin_total"; -const string METRIC_RUNNER_EBPF_STOP_PLUGIN_TOTAL = "runner_stop_plugin_total"; -const string METRIC_RUNNER_EBPF_SUSPEND_PLUGIN_TOTAL = "runner_suspend_plugin_total"; +const string METRIC_RUNNER_EBPF_START_PLUGIN_TOTAL = "start_plugin_total"; +const string METRIC_RUNNER_EBPF_STOP_PLUGIN_TOTAL = "stop_plugin_total"; +const string METRIC_RUNNER_EBPF_SUSPEND_PLUGIN_TOTAL = "suspend_plugin_total"; } // namespace logtail \ No newline at end of file diff --git a/core/pipeline/Pipeline.cpp b/core/pipeline/Pipeline.cpp index 40dec54bd6..d80ebc5064 100644 --- a/core/pipeline/Pipeline.cpp +++ b/core/pipeline/Pipeline.cpp @@ -318,9 +318,8 @@ bool Pipeline::Init(PipelineConfig&& config) { WriteMetrics::GetInstance()->PrepareMetricsRecordRef( mMetricsRecordRef, - {{METRIC_LABEL_KEY_PROJECT, mContext.GetProjectName()}, - {METRIC_LABEL_KEY_PIPELINE_NAME, mName}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_PIPELINE}}); + MetricCategory::METRIC_CATEGORY_PIPELINE, + {{METRIC_LABEL_KEY_PROJECT, mContext.GetProjectName()}, {METRIC_LABEL_KEY_PIPELINE_NAME, mName}}); mStartTime = mMetricsRecordRef.CreateIntGauge(METRIC_PIPELINE_START_TIME); mProcessorsInEventsTotal = mMetricsRecordRef.CreateCounter(METRIC_PIPELINE_PROCESSORS_IN_EVENTS_TOTAL); mProcessorsInGroupsTotal = mMetricsRecordRef.CreateCounter(METRIC_PIPELINE_PROCESSORS_IN_EVENT_GROUPS_TOTAL); diff --git a/core/pipeline/batch/Batcher.h b/core/pipeline/batch/Batcher.h index aeca80b600..2b1898fd55 100644 --- a/core/pipeline/batch/Batcher.h +++ b/core/pipeline/batch/Batcher.h @@ -104,14 +104,14 @@ class Batcher { {METRIC_LABEL_KEY_PROJECT, ctx.GetProjectName()}, {METRIC_LABEL_KEY_PIPELINE_NAME, ctx.GetConfigName()}, {METRIC_LABEL_KEY_COMPONENT_NAME, METRIC_LABEL_VALUE_COMPONENT_NAME_BATCHER}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_COMPONENT}, {METRIC_LABEL_KEY_FLUSHER_PLUGIN_ID, flusher->GetPluginID()}}; if (enableGroupBatch) { labels.emplace_back(METRIC_LABEL_KEY_GROUP_BATCH_ENABLED, "true"); } else { labels.emplace_back(METRIC_LABEL_KEY_GROUP_BATCH_ENABLED, "false"); } - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, std::move(labels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef( + mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_COMPONENT, std::move(labels)); mInEventsTotal = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_IN_EVENTS_TOTAL); mInGroupDataSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_IN_SIZE_BYTES); mOutEventsTotal = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_OUT_EVENTS_TOTAL); diff --git a/core/pipeline/plugin/interface/Plugin.h b/core/pipeline/plugin/interface/Plugin.h index 087a44cd41..1dbd872756 100644 --- a/core/pipeline/plugin/interface/Plugin.h +++ b/core/pipeline/plugin/interface/Plugin.h @@ -39,9 +39,9 @@ class Plugin { void SetMetricsRecordRef(const std::string& name, const std::string& id) { WriteMetrics::GetInstance()->PrepareMetricsRecordRef( mMetricsRecordRef, + MetricCategory::METRIC_CATEGORY_PLUGIN, {{METRIC_LABEL_KEY_PROJECT, mContext->GetProjectName()}, {METRIC_LABEL_KEY_PIPELINE_NAME, mContext->GetConfigName()}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_PLUGIN}, {METRIC_LABEL_KEY_PLUGIN_TYPE, name}, {METRIC_LABEL_KEY_PLUGIN_ID, id}}); } diff --git a/core/pipeline/queue/QueueInterface.h b/core/pipeline/queue/QueueInterface.h index e7cf3a37a1..b0ea6d0e47 100644 --- a/core/pipeline/queue/QueueInterface.h +++ b/core/pipeline/queue/QueueInterface.h @@ -28,10 +28,10 @@ class QueueInterface { public: QueueInterface(QueueKey key, size_t cap, const PipelineContext& ctx) : mKey(key), mCapacity(cap) { WriteMetrics::GetInstance()->CreateMetricsRecordRef(mMetricsRecordRef, + MetricCategory::METRIC_CATEGORY_COMPONENT, { {METRIC_LABEL_KEY_PROJECT, ctx.GetProjectName()}, {METRIC_LABEL_KEY_PIPELINE_NAME, ctx.GetConfigName()}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_COMPONENT}, }); mInItemsTotal = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_IN_ITEMS_TOTAL); diff --git a/core/pipeline/route/Router.cpp b/core/pipeline/route/Router.cpp index 08abd08cca..521f4b0404 100644 --- a/core/pipeline/route/Router.cpp +++ b/core/pipeline/route/Router.cpp @@ -37,10 +37,10 @@ bool Router::Init(std::vector> configs, const P WriteMetrics::GetInstance()->PrepareMetricsRecordRef( mMetricsRecordRef, + MetricCategory::METRIC_CATEGORY_COMPONENT, {{METRIC_LABEL_KEY_PROJECT, ctx.GetProjectName()}, {METRIC_LABEL_KEY_PIPELINE_NAME, ctx.GetConfigName()}, - {METRIC_LABEL_KEY_COMPONENT_NAME, METRIC_LABEL_VALUE_COMPONENT_NAME_ROUTER}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_COMPONENT}}); + {METRIC_LABEL_KEY_COMPONENT_NAME, METRIC_LABEL_VALUE_COMPONENT_NAME_ROUTER}}); mInEventsTotal = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_IN_EVENTS_TOTAL); mInGroupDataSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_IN_SIZE_BYTES); return true; diff --git a/core/pipeline/serializer/Serializer.h b/core/pipeline/serializer/Serializer.h index a9a99c737e..27cc2e0847 100644 --- a/core/pipeline/serializer/Serializer.h +++ b/core/pipeline/serializer/Serializer.h @@ -49,10 +49,10 @@ class Serializer { Serializer(Flusher* f) : mFlusher(f) { WriteMetrics::GetInstance()->PrepareMetricsRecordRef( mMetricsRecordRef, + MetricCategory::METRIC_CATEGORY_COMPONENT, {{METRIC_LABEL_KEY_PROJECT, f->GetContext().GetProjectName()}, {METRIC_LABEL_KEY_PIPELINE_NAME, f->GetContext().GetConfigName()}, {METRIC_LABEL_KEY_COMPONENT_NAME, METRIC_LABEL_VALUE_COMPONENT_NAME_SERIALIZER}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_COMPONENT}, {METRIC_LABEL_KEY_FLUSHER_PLUGIN_ID, f->GetPluginID()}}); mInItemsTotal = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_IN_ITEMS_TOTAL); mInItemSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_IN_SIZE_BYTES); @@ -60,7 +60,7 @@ class Serializer { mOutItemSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_OUT_SIZE_BYTES); mTotalProcessMs = mMetricsRecordRef.CreateTimeCounter(METRIC_COMPONENT_TOTAL_PROCESS_TIME_MS); mDiscardedItemsTotal = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_DISCARDED_ITEMS_TOTAL); - mDiscardedItemSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_DISCARDED_ITEMS_SIZE_BYTES); + mDiscardedItemSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_COMPONENT_DISCARDED_SIZE_BYTES); } virtual ~Serializer() = default; diff --git a/core/plugin/input/InputContainerStdio.cpp b/core/plugin/input/InputContainerStdio.cpp index 46a018e067..6c3f5d49a4 100644 --- a/core/plugin/input/InputContainerStdio.cpp +++ b/core/plugin/input/InputContainerStdio.cpp @@ -69,7 +69,8 @@ bool InputContainerStdio::Init(const Json::Value& config, Json::Value& optionalG if (!mContainerDiscovery.Init(config, *mContext, sName)) { return false; } - mContainerDiscovery.GenerateContainerMetaFetchingGoPipeline(optionalGoPipeline, nullptr, mContext->GetPipeline().GenNextPluginMeta(false)); + mContainerDiscovery.GenerateContainerMetaFetchingGoPipeline( + optionalGoPipeline, nullptr, mContext->GetPipeline().GenNextPluginMeta(false)); if (!mFileReader.Init(config, *mContext, sName)) { return false; @@ -167,8 +168,8 @@ bool InputContainerStdio::Init(const Json::Value& config, Json::Value& optionalG {METRIC_PLUGIN_SOURCE_SIZE_BYTES, MetricType::METRIC_TYPE_INT_GAUGE}, {METRIC_PLUGIN_SOURCE_READ_OFFSET_BYTES, MetricType::METRIC_TYPE_INT_GAUGE}, }; - mPluginMetricManager - = std::make_shared(GetMetricsRecordRef()->GetLabels(), inputFileMetricKeys); + mPluginMetricManager = std::make_shared( + GetMetricsRecordRef()->GetLabels(), inputFileMetricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE); // Register a Gauge metric to record PluginMetricManager‘s map size mMonitorFileTotal = GetMetricsRecordRef().CreateIntGauge(METRIC_PLUGIN_MONITOR_FILE_TOTAL); mPluginMetricManager->RegisterSizeGauge(mMonitorFileTotal); diff --git a/core/plugin/input/InputFile.cpp b/core/plugin/input/InputFile.cpp index c5b0039a19..b8186797b8 100644 --- a/core/plugin/input/InputFile.cpp +++ b/core/plugin/input/InputFile.cpp @@ -165,8 +165,8 @@ bool InputFile::Init(const Json::Value& config, Json::Value& optionalGoPipeline) {METRIC_PLUGIN_SOURCE_SIZE_BYTES, MetricType::METRIC_TYPE_INT_GAUGE}, {METRIC_PLUGIN_SOURCE_READ_OFFSET_BYTES, MetricType::METRIC_TYPE_INT_GAUGE}, }; - mPluginMetricManager - = std::make_shared(GetMetricsRecordRef()->GetLabels(), inputFileMetricKeys); + mPluginMetricManager = std::make_shared( + GetMetricsRecordRef()->GetLabels(), inputFileMetricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE); mPluginMetricManager->RegisterSizeGauge(mMonitorFileTotal); return CreateInnerProcessors(); diff --git a/core/plugin/input/InputFileSecurity.cpp b/core/plugin/input/InputFileSecurity.cpp index 3510833830..5c15536721 100644 --- a/core/plugin/input/InputFileSecurity.cpp +++ b/core/plugin/input/InputFileSecurity.cpp @@ -43,7 +43,7 @@ bool InputFileSecurity::Init(const Json::Value& config, Json::Value& optionalGoP {METRIC_PLUGIN_EBPF_PROCESS_CACHE_MISS_TOTAL, MetricType::METRIC_TYPE_COUNTER}, }; - mPluginMgr = std::make_shared(GetMetricsRecordRef().GetLabels(), metricKeys); + mPluginMgr = std::make_shared(GetMetricsRecordRef().GetLabels(), metricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE); return mSecurityOptions.Init(ebpf::SecurityProbeType::FILE, config, mContext, sName); } diff --git a/core/plugin/input/InputFileSecurity.h b/core/plugin/input/InputFileSecurity.h index e096e585f9..fea0b459fc 100644 --- a/core/plugin/input/InputFileSecurity.h +++ b/core/plugin/input/InputFileSecurity.h @@ -36,7 +36,7 @@ class InputFileSecurity : public Input { bool SupportAck() const override { return false; } ebpf::SecurityOptions mSecurityOptions; - std::shared_ptr mPluginMgr; + PluginMetricManagerPtr mPluginMgr; }; } // namespace logtail diff --git a/core/plugin/input/InputNetworkObserver.cpp b/core/plugin/input/InputNetworkObserver.cpp index c2b997d54f..ce9c4218ff 100644 --- a/core/plugin/input/InputNetworkObserver.cpp +++ b/core/plugin/input/InputNetworkObserver.cpp @@ -49,7 +49,7 @@ bool InputNetworkObserver::Init(const Json::Value& config, Json::Value& optional {METRIC_PLUGIN_EBPF_NETWORK_OBSERVER_PROTOCOL_PARSE_RECORDS_TOTAL, MetricType::METRIC_TYPE_COUNTER}, }; - mPluginMgr = std::make_shared(GetMetricsRecordRef().GetLabels(), metricKeys); + mPluginMgr = std::make_shared(GetMetricsRecordRef().GetLabels(), metricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE); return ebpf::InitObserverNetworkOption(config, mNetworkOption, mContext, sName); } diff --git a/core/plugin/input/InputNetworkObserver.h b/core/plugin/input/InputNetworkObserver.h index 7cc3d98ca6..7f204a2c90 100644 --- a/core/plugin/input/InputNetworkObserver.h +++ b/core/plugin/input/InputNetworkObserver.h @@ -36,7 +36,7 @@ class InputNetworkObserver : public Input { bool SupportAck() const override { return false; } nami::ObserverNetworkOption mNetworkOption; - std::shared_ptr mPluginMgr; + PluginMetricManagerPtr mPluginMgr; }; } // namespace logtail diff --git a/core/plugin/input/InputNetworkSecurity.cpp b/core/plugin/input/InputNetworkSecurity.cpp index 409d40f77c..ccadcc26ca 100644 --- a/core/plugin/input/InputNetworkSecurity.cpp +++ b/core/plugin/input/InputNetworkSecurity.cpp @@ -45,7 +45,7 @@ bool InputNetworkSecurity::Init(const Json::Value& config, Json::Value& optional {METRIC_PLUGIN_EBPF_PROCESS_CACHE_MISS_TOTAL, MetricType::METRIC_TYPE_COUNTER}, }; - mPluginMgr = std::make_shared(GetMetricsRecordRef().GetLabels(), metricKeys); + mPluginMgr = std::make_shared(GetMetricsRecordRef().GetLabels(), metricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE); return mSecurityOptions.Init(ebpf::SecurityProbeType::NETWORK, config, mContext, sName); } diff --git a/core/plugin/input/InputNetworkSecurity.h b/core/plugin/input/InputNetworkSecurity.h index 20e3025246..cda3a7c170 100644 --- a/core/plugin/input/InputNetworkSecurity.h +++ b/core/plugin/input/InputNetworkSecurity.h @@ -35,7 +35,7 @@ class InputNetworkSecurity : public Input { bool SupportAck() const override { return false; } ebpf::SecurityOptions mSecurityOptions; - std::shared_ptr mPluginMgr; + PluginMetricManagerPtr mPluginMgr; }; } // namespace logtail diff --git a/core/plugin/input/InputProcessSecurity.cpp b/core/plugin/input/InputProcessSecurity.cpp index cc11afa854..3baa6cbed8 100644 --- a/core/plugin/input/InputProcessSecurity.cpp +++ b/core/plugin/input/InputProcessSecurity.cpp @@ -42,7 +42,7 @@ bool InputProcessSecurity::Init(const Json::Value& config, Json::Value& optional {METRIC_PLUGIN_EBPF_PROCESS_CACHE_MISS_TOTAL, MetricType::METRIC_TYPE_COUNTER}, }; - mPluginMgr = std::make_shared(GetMetricsRecordRef().GetLabels(), metricKeys); + mPluginMgr = std::make_shared(GetMetricsRecordRef().GetLabels(), metricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE); return mSecurityOptions.Init(ebpf::SecurityProbeType::PROCESS, config, mContext, sName); } diff --git a/core/plugin/input/InputProcessSecurity.h b/core/plugin/input/InputProcessSecurity.h index b45fec9b5c..d26d7a95e3 100644 --- a/core/plugin/input/InputProcessSecurity.h +++ b/core/plugin/input/InputProcessSecurity.h @@ -35,7 +35,7 @@ class InputProcessSecurity : public Input { bool SupportAck() const override { return false; } ebpf::SecurityOptions mSecurityOptions; - std::shared_ptr mPluginMgr; + PluginMetricManagerPtr mPluginMgr; }; } // namespace logtail diff --git a/core/prometheus/PromSelfMonitor.cpp b/core/prometheus/PromSelfMonitor.cpp index 39c1473014..5852fc45ae 100644 --- a/core/prometheus/PromSelfMonitor.cpp +++ b/core/prometheus/PromSelfMonitor.cpp @@ -13,7 +13,7 @@ namespace logtail { void PromSelfMonitorUnsafe::InitMetricManager(const std::unordered_map& metricKeys, const MetricLabels& labels) { auto metricLabels = std::make_shared(labels); - mPluginMetricManagerPtr = std::make_shared(metricLabels, metricKeys); + mPluginMetricManagerPtr = std::make_shared(metricLabels, metricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE); } void PromSelfMonitorUnsafe::AddCounter(const std::string& metricName, uint64_t statusCode, uint64_t val) { diff --git a/core/prometheus/PrometheusInputRunner.cpp b/core/prometheus/PrometheusInputRunner.cpp index c033b5c2d1..59cc58d4d5 100644 --- a/core/prometheus/PrometheusInputRunner.cpp +++ b/core/prometheus/PrometheusInputRunner.cpp @@ -53,7 +53,6 @@ PrometheusInputRunner::PrometheusInputRunner() // self monitor MetricLabels labels; - labels.emplace_back(METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_RUNNER); labels.emplace_back(METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_PROMETHEUS); labels.emplace_back(METRIC_LABEL_KEY_INSTANCE_ID, Application::GetInstance()->GetInstanceId()); labels.emplace_back(METRIC_LABEL_KEY_POD_NAME, mPodName); @@ -64,7 +63,7 @@ PrometheusInputRunner::PrometheusInputRunner() dynamicLabels.emplace_back(METRIC_LABEL_KEY_PROJECT, [this]() -> std::string { return this->GetAllProjects(); }); WriteMetrics::GetInstance()->PrepareMetricsRecordRef( - mMetricsRecordRef, std::move(labels), std::move(dynamicLabels)); + mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_RUNNER, std::move(labels), std::move(dynamicLabels)); mPromRegisterState = mMetricsRecordRef.CreateIntGauge(METRIC_RUNNER_CLIENT_REGISTER_STATE); mPromJobNum = mMetricsRecordRef.CreateIntGauge(METRIC_RUNNER_JOBS_TOTAL); diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index 33ef0886ad..0908cb6ea7 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -221,7 +221,7 @@ void ScrapeScheduler::InitSelfMonitor(const MetricLabels& defaultLabels) { mSelfMonitor->InitMetricManager(sScrapeMetricKeys, labels); - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, std::move(labels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE, std::move(labels)); mPromDelayTotal = mMetricsRecordRef.CreateCounter(METRIC_PLUGIN_PROM_SCRAPE_DELAY_TOTAL); mPluginTotalDelayMs = mMetricsRecordRef.CreateCounter(METRIC_PLUGIN_TOTAL_DELAY_MS); } diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index 96de4f8ee6..d44948e469 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -334,7 +334,7 @@ void TargetSubscriberScheduler::InitSelfMonitor(const MetricLabels& defaultLabel mSelfMonitor = std::make_shared(); mSelfMonitor->InitMetricManager(sSubscriberMetricKeys, mDefaultLabels); - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, std::move(mDefaultLabels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE, std::move(mDefaultLabels)); mPromSubscriberTargets = mMetricsRecordRef.CreateIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS); mTotalDelayMs = mMetricsRecordRef.CreateCounter(METRIC_PLUGIN_TOTAL_DELAY_MS); } diff --git a/core/runner/FlusherRunner.cpp b/core/runner/FlusherRunner.cpp index 095241c6ef..c98acd7bf2 100644 --- a/core/runner/FlusherRunner.cpp +++ b/core/runner/FlusherRunner.cpp @@ -45,8 +45,8 @@ bool FlusherRunner::Init() { WriteMetrics::GetInstance()->PrepareMetricsRecordRef( mMetricsRecordRef, - {{METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_FLUSHER}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_RUNNER}}); + MetricCategory::METRIC_CATEGORY_RUNNER, + {{METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_FLUSHER}}); mInItemsTotal = mMetricsRecordRef.CreateCounter(METRIC_RUNNER_IN_ITEMS_TOTAL); mInItemDataSizeBytes = mMetricsRecordRef.CreateCounter(METRIC_RUNNER_IN_SIZE_BYTES); mOutItemsTotal = mMetricsRecordRef.CreateCounter(METRIC_RUNNER_OUT_ITEMS_TOTAL); diff --git a/core/runner/ProcessorRunner.cpp b/core/runner/ProcessorRunner.cpp index 98b604747a..7a0963d542 100644 --- a/core/runner/ProcessorRunner.cpp +++ b/core/runner/ProcessorRunner.cpp @@ -89,9 +89,9 @@ void ProcessorRunner::Run(uint32_t threadNo) { // thread local metrics should be initialized in each thread WriteMetrics::GetInstance()->PrepareMetricsRecordRef( sMetricsRecordRef, + MetricCategory::METRIC_CATEGORY_RUNNER, {{METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_PROCESSOR}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_RUNNER}, - {"thread_no", ToString(threadNo)}}); + {METRIC_LABEL_KEY_THREAD_NO, ToString(threadNo)}}); sInGroupsCnt = sMetricsRecordRef.CreateCounter(METRIC_RUNNER_IN_EVENT_GROUPS_TOTAL); sInEventsCnt = sMetricsRecordRef.CreateCounter(METRIC_RUNNER_IN_EVENTS_TOTAL); sInGroupDataSizeBytes = sMetricsRecordRef.CreateCounter(METRIC_RUNNER_IN_SIZE_BYTES); @@ -138,7 +138,7 @@ void ProcessorRunner::Run(uint32_t threadNo) { pipeline->Process(eventGroupList, item->mInputIndex); if (pipeline->IsFlushingThroughGoPipeline()) { - // TODO: + // TODO: // 1. allow all event types to be sent to Go pipelines // 2. use event group protobuf instead if (isLog) { diff --git a/core/runner/sink/http/HttpSink.cpp b/core/runner/sink/http/HttpSink.cpp index 2969ee5bf9..4429ce40ec 100644 --- a/core/runner/sink/http/HttpSink.cpp +++ b/core/runner/sink/http/HttpSink.cpp @@ -40,8 +40,8 @@ bool HttpSink::Init() { WriteMetrics::GetInstance()->PrepareMetricsRecordRef( mMetricsRecordRef, - {{METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_HTTP_SINK}, - {METRIC_LABEL_KEY_METRIC_CATEGORY, METRIC_LABEL_KEY_METRIC_CATEGORY_RUNNER}}); + MetricCategory::METRIC_CATEGORY_RUNNER, + {{METRIC_LABEL_KEY_RUNNER_NAME, METRIC_LABEL_VALUE_RUNNER_NAME_HTTP_SINK}}); mInItemsTotal = mMetricsRecordRef.CreateCounter(METRIC_RUNNER_IN_ITEMS_TOTAL); mLastRunTime = mMetricsRecordRef.CreateIntGauge(METRIC_RUNNER_LAST_RUN_TIME); mOutSuccessfulItemsTotal = mMetricsRecordRef.CreateCounter(METRIC_RUNNER_SINK_OUT_SUCCESSFUL_ITEMS_TOTAL); diff --git a/core/unittest/batch/BatcherUnittest.cpp b/core/unittest/batch/BatcherUnittest.cpp index 5e105d6f90..20ae0e11fc 100644 --- a/core/unittest/batch/BatcherUnittest.cpp +++ b/core/unittest/batch/BatcherUnittest.cpp @@ -614,7 +614,7 @@ void BatcherUnittest::TestMetric() { vector res; batch.Add(std::move(g), res); - APSARA_TEST_EQUAL(6U, batch.mMetricsRecordRef->GetLabels()->size()); + APSARA_TEST_EQUAL(5U, batch.mMetricsRecordRef->GetLabels()->size()); APSARA_TEST_TRUE(batch.mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PROJECT, "")); APSARA_TEST_TRUE(batch.mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PIPELINE_NAME, "test_config")); APSARA_TEST_TRUE(batch.mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_COMPONENT_NAME, diff --git a/core/unittest/compression/CompressorFactoryUnittest.cpp b/core/unittest/compression/CompressorFactoryUnittest.cpp index 3a9aefb8eb..e9244c3904 100644 --- a/core/unittest/compression/CompressorFactoryUnittest.cpp +++ b/core/unittest/compression/CompressorFactoryUnittest.cpp @@ -95,7 +95,7 @@ void CompressorFactoryUnittest::TestCompressTypeToString() { void CompressorFactoryUnittest::TestMetric() { auto compressor = CompressorFactory::GetInstance()->Create(Json::Value(), mCtx, "test_plugin", mFlusherId, CompressType::LZ4); - APSARA_TEST_EQUAL(5U, compressor->mMetricsRecordRef->GetLabels()->size()); + APSARA_TEST_EQUAL(4U, compressor->mMetricsRecordRef->GetLabels()->size()); APSARA_TEST_TRUE(compressor->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PROJECT, "")); APSARA_TEST_TRUE(compressor->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PIPELINE_NAME, "test_config")); APSARA_TEST_TRUE(compressor->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_COMPONENT_NAME, METRIC_LABEL_VALUE_COMPONENT_NAME_COMPRESSOR)); diff --git a/core/unittest/monitor/LogtailMetricUnittest.cpp b/core/unittest/monitor/LogtailMetricUnittest.cpp index 9a1c680f51..30d5822e47 100644 --- a/core/unittest/monitor/LogtailMetricUnittest.cpp +++ b/core/unittest/monitor/LogtailMetricUnittest.cpp @@ -54,7 +54,7 @@ void ILogtailMetricUnittest::TestCreateMetricAutoDelete() { labels.emplace_back(std::make_pair("region", "cn-hangzhou")); MetricsRecordRef fileMetric; - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(fileMetric, std::move(labels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(fileMetric, MetricCategory::METRIC_CATEGORY_UNKNOWN, std::move(labels)); APSARA_TEST_EQUAL(fileMetric->GetLabels()->size(), 3); @@ -92,7 +92,7 @@ void ILogtailMetricUnittest::TestCreateMetricAutoDelete() { labels.emplace_back(std::make_pair("region", "cn-hangzhou")); MetricsRecordRef fileMetric2; - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(fileMetric2, std::move(labels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(fileMetric2, MetricCategory::METRIC_CATEGORY_UNKNOWN, std::move(labels)); CounterPtr fileCounter2 = fileMetric2.CreateCounter("filed2"); fileCounter2->Add(222UL); } @@ -103,7 +103,7 @@ void ILogtailMetricUnittest::TestCreateMetricAutoDelete() { labels.emplace_back(std::make_pair("logstore", "logstore1")); labels.emplace_back(std::make_pair("region", "cn-hangzhou")); MetricsRecordRef fileMetric3; - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(fileMetric3, std::move(labels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(fileMetric3, MetricCategory::METRIC_CATEGORY_UNKNOWN, std::move(labels)); CounterPtr fileCounter3 = fileMetric3.CreateCounter("filed3"); fileCounter3->Add(333UL); } @@ -144,7 +144,7 @@ void createMetrics(int count) { labels.emplace_back(std::make_pair("count", std::to_string(count))); labels.emplace_back(std::make_pair("region", "cn-beijing")); MetricsRecordRef fileMetric; - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(fileMetric, std::move(labels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(fileMetric, MetricCategory::METRIC_CATEGORY_UNKNOWN, std::move(labels)); CounterPtr fileCounter = fileMetric.CreateCounter("filed1"); fileCounter->Add(111UL); } @@ -212,7 +212,7 @@ void ILogtailMetricUnittest::TestCreateAndDeleteMetric() { labels.emplace_back(std::make_pair("project", "test1")); labels.emplace_back(std::make_pair("logstore", "test1")); labels.emplace_back(std::make_pair("region", "cn-beijing")); - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(*fileMetric1, std::move(labels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(*fileMetric1, MetricCategory::METRIC_CATEGORY_UNKNOWN, std::move(labels)); CounterPtr fileCounter = fileMetric1->CreateCounter("filed1"); fileCounter->Add(111UL); @@ -221,7 +221,7 @@ void ILogtailMetricUnittest::TestCreateAndDeleteMetric() { labels.emplace_back(std::make_pair("project", "test2")); labels.emplace_back(std::make_pair("logstore", "test2")); labels.emplace_back(std::make_pair("region", "cn-beijing")); - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(*fileMetric2, std::move(labels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(*fileMetric2, MetricCategory::METRIC_CATEGORY_UNKNOWN, std::move(labels)); CounterPtr fileCounter = fileMetric2->CreateCounter("filed1"); fileCounter->Add(111UL); } @@ -231,7 +231,7 @@ void ILogtailMetricUnittest::TestCreateAndDeleteMetric() { labels.emplace_back(std::make_pair("project", "test3")); labels.emplace_back(std::make_pair("logstore", "test3")); labels.emplace_back(std::make_pair("region", "cn-beijing")); - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(*fileMetric3, std::move(labels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(*fileMetric3, MetricCategory::METRIC_CATEGORY_UNKNOWN, std::move(labels)); CounterPtr fileCounter = fileMetric3->CreateCounter("filed1"); fileCounter->Add(111UL); } diff --git a/core/unittest/monitor/PluginMetricManagerUnittest.cpp b/core/unittest/monitor/PluginMetricManagerUnittest.cpp index 2e0db37022..41ef503c75 100644 --- a/core/unittest/monitor/PluginMetricManagerUnittest.cpp +++ b/core/unittest/monitor/PluginMetricManagerUnittest.cpp @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "monitor/metric_constants/MetricConstants.h" #include "monitor/PluginMetricManager.h" +#include "monitor/metric_constants/MetricConstants.h" #include "unittest/Unittest.h" namespace logtail { @@ -28,11 +28,12 @@ class PluginMetricManagerUnittest : public ::testing::Test { defaultLabels->emplace_back(METRIC_LABEL_KEY_PIPELINE_NAME, "default_config"); defaultLabels->emplace_back(METRIC_LABEL_KEY_PLUGIN_TYPE, "default_plugin"); defaultLabels->emplace_back(METRIC_LABEL_KEY_PLUGIN_ID, "default_id"); - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, std::move(*defaultLabels)); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_UNKNOWN, std::move(*defaultLabels)); std::unordered_map metricKeys; metricKeys.emplace("default_counter", MetricType::METRIC_TYPE_COUNTER); metricKeys.emplace("default_gauge", MetricType::METRIC_TYPE_INT_GAUGE); - pluginMetricManager = std::make_shared(mMetricsRecordRef->GetLabels(), metricKeys); + pluginMetricManager = std::make_shared( + mMetricsRecordRef->GetLabels(), metricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE); } void TearDown() {} diff --git a/core/unittest/pipeline/PipelineUnittest.cpp b/core/unittest/pipeline/PipelineUnittest.cpp index 396e84627c..d8a2581a62 100644 --- a/core/unittest/pipeline/PipelineUnittest.cpp +++ b/core/unittest/pipeline/PipelineUnittest.cpp @@ -124,7 +124,7 @@ void PipelineUnittest::OnSuccessfulInit() const { APSARA_TEST_EQUAL(QueueKeyManager::GetInstance()->GetKey("test_config-flusher_sls-test_project#test_logstore"), pipeline->GetContext().GetLogstoreKey()); APSARA_TEST_EQUAL(0, pipeline->mInProcessCnt.load()); - APSARA_TEST_EQUAL(3U, pipeline->mMetricsRecordRef->GetLabels()->size()); + APSARA_TEST_EQUAL(2U, pipeline->mMetricsRecordRef->GetLabels()->size()); APSARA_TEST_TRUE(pipeline->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PIPELINE_NAME, configName)); APSARA_TEST_TRUE(pipeline->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PROJECT, "test_project")); @@ -2702,7 +2702,7 @@ void PipelineUnittest::TestProcess() const { processor->Init(Json::Value(), ctx); pipeline.mProcessorLine.emplace_back(std::move(processor)); - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(pipeline.mMetricsRecordRef, {}); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(pipeline.mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_UNKNOWN, {}); pipeline.mProcessorsInEventsTotal = pipeline.mMetricsRecordRef.CreateCounter(METRIC_PIPELINE_PROCESSORS_IN_EVENTS_TOTAL); pipeline.mProcessorsInGroupsTotal @@ -2750,7 +2750,7 @@ void PipelineUnittest::TestSend() const { configs.emplace_back(1, nullptr); pipeline.mRouter.Init(configs, ctx); - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(pipeline.mMetricsRecordRef, {}); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(pipeline.mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_UNKNOWN, {}); pipeline.mFlushersInGroupsTotal = pipeline.mMetricsRecordRef.CreateCounter(METRIC_PIPELINE_FLUSHERS_IN_EVENT_GROUPS_TOTAL); pipeline.mFlushersInEventsTotal @@ -2816,7 +2816,7 @@ void PipelineUnittest::TestSend() const { configs.emplace_back(configJson.size(), nullptr); pipeline.mRouter.Init(configs, ctx); - WriteMetrics::GetInstance()->PrepareMetricsRecordRef(pipeline.mMetricsRecordRef, {}); + WriteMetrics::GetInstance()->PrepareMetricsRecordRef(pipeline.mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_UNKNOWN, {}); pipeline.mFlushersInGroupsTotal = pipeline.mMetricsRecordRef.CreateCounter(METRIC_PIPELINE_FLUSHERS_IN_EVENT_GROUPS_TOTAL); pipeline.mFlushersInEventsTotal diff --git a/core/unittest/prometheus/PromSelfMonitorUnittest.cpp b/core/unittest/prometheus/PromSelfMonitorUnittest.cpp index 45501ca356..b262b7d8ec 100644 --- a/core/unittest/prometheus/PromSelfMonitorUnittest.cpp +++ b/core/unittest/prometheus/PromSelfMonitorUnittest.cpp @@ -23,7 +23,7 @@ void PromSelfMonitorUnittest::TestCounterAdd() { // check result auto metric = selfMonitor->mPromStatusMap["2XX"]->GetCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL); - APSARA_TEST_EQUAL("plugin_prom_subscribe_total", metric->GetName()); + APSARA_TEST_EQUAL("prom_subscribe_total", metric->GetName()); APSARA_TEST_EQUAL(999ULL, metric->GetValue()); selfMonitor->AddCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL, 200); APSARA_TEST_EQUAL(1000ULL, metric->GetValue()); @@ -41,7 +41,7 @@ void PromSelfMonitorUnittest::TestIntGaugeSet() { // check result auto metric = selfMonitor->mPromStatusMap["2XX"]->GetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS); - APSARA_TEST_EQUAL("plugin_prom_subscribe_targets", metric->GetName()); + APSARA_TEST_EQUAL("prom_subscribe_targets", metric->GetName()); APSARA_TEST_EQUAL(999ULL, metric->GetValue()); selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 200, 0); APSARA_TEST_EQUAL(0ULL, metric->GetValue()); diff --git a/core/unittest/queue/BoundedProcessQueueUnittest.cpp b/core/unittest/queue/BoundedProcessQueueUnittest.cpp index cb14522036..08a178b207 100644 --- a/core/unittest/queue/BoundedProcessQueueUnittest.cpp +++ b/core/unittest/queue/BoundedProcessQueueUnittest.cpp @@ -118,7 +118,7 @@ void BoundedProcessQueueUnittest::TestPop() { } void BoundedProcessQueueUnittest::TestMetric() { - APSARA_TEST_EQUAL(5U, mQueue->mMetricsRecordRef->GetLabels()->size()); + APSARA_TEST_EQUAL(4U, mQueue->mMetricsRecordRef->GetLabels()->size()); APSARA_TEST_TRUE(mQueue->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PROJECT, "")); APSARA_TEST_TRUE(mQueue->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PIPELINE_NAME, "test_config")); APSARA_TEST_TRUE(mQueue->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_COMPONENT_NAME, METRIC_LABEL_VALUE_COMPONENT_NAME_PROCESS_QUEUE)); diff --git a/core/unittest/queue/CircularProcessQueueUnittest.cpp b/core/unittest/queue/CircularProcessQueueUnittest.cpp index 3708fdc309..cd80c823ad 100644 --- a/core/unittest/queue/CircularProcessQueueUnittest.cpp +++ b/core/unittest/queue/CircularProcessQueueUnittest.cpp @@ -151,7 +151,7 @@ void CircularProcessQueueUnittest::TestReset() { } void CircularProcessQueueUnittest::TestMetric() { - APSARA_TEST_EQUAL(5U, mQueue->mMetricsRecordRef->GetLabels()->size()); + APSARA_TEST_EQUAL(4U, mQueue->mMetricsRecordRef->GetLabels()->size()); APSARA_TEST_TRUE(mQueue->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PROJECT, "")); APSARA_TEST_TRUE(mQueue->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PIPELINE_NAME, "test_config")); APSARA_TEST_TRUE(mQueue->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_COMPONENT_NAME, METRIC_LABEL_VALUE_COMPONENT_NAME_PROCESS_QUEUE)); diff --git a/core/unittest/queue/SenderQueueUnittest.cpp b/core/unittest/queue/SenderQueueUnittest.cpp index b31c68edda..78cfa287d6 100644 --- a/core/unittest/queue/SenderQueueUnittest.cpp +++ b/core/unittest/queue/SenderQueueUnittest.cpp @@ -174,7 +174,7 @@ void SenderQueueUnittest::TestGetAvailableItems() { } void SenderQueueUnittest::TestMetric() { - APSARA_TEST_EQUAL(6U, mQueue->mMetricsRecordRef->GetLabels()->size()); + APSARA_TEST_EQUAL(5U, mQueue->mMetricsRecordRef->GetLabels()->size()); APSARA_TEST_TRUE(mQueue->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PROJECT, "")); APSARA_TEST_TRUE(mQueue->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PIPELINE_NAME, "test_config")); APSARA_TEST_TRUE(mQueue->mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_COMPONENT_NAME, METRIC_LABEL_VALUE_COMPONENT_NAME_SENDER_QUEUE)); diff --git a/core/unittest/route/RouterUnittest.cpp b/core/unittest/route/RouterUnittest.cpp index 4d7ae94531..edc78c7a0b 100644 --- a/core/unittest/route/RouterUnittest.cpp +++ b/core/unittest/route/RouterUnittest.cpp @@ -169,7 +169,7 @@ void RouterUnittest::TestMetric() { Router router; router.Init(configs, ctx); - APSARA_TEST_EQUAL(4U, router.mMetricsRecordRef->GetLabels()->size()); + APSARA_TEST_EQUAL(3U, router.mMetricsRecordRef->GetLabels()->size()); APSARA_TEST_TRUE(router.mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PROJECT, "")); APSARA_TEST_TRUE(router.mMetricsRecordRef.HasLabel(METRIC_LABEL_KEY_PIPELINE_NAME, "test_config")); APSARA_TEST_TRUE( diff --git a/pkg/helper/k8smeta/k8s_meta_manager.go b/pkg/helper/k8smeta/k8s_meta_manager.go index 017e57782e..1432b1561e 100644 --- a/pkg/helper/k8smeta/k8s_meta_manager.go +++ b/pkg/helper/k8smeta/k8s_meta_manager.go @@ -226,18 +226,18 @@ func GetMetaManagerMetrics() []map[string]string { manager.metricRecord.Labels = []pipeline.Label{ { Key: helper.MetricLabelKeyMetricCategory, - Value: helper.MetricLabelKeyMetricCategoryRunner, + Value: helper.MetricLabelValueMetricCategoryRunner, }, { - Key: "cluster_id", + Key: helper.MetricLabelKeyClusterID, Value: *flags.ClusterID, }, { - Key: "runner_name", - Value: "k8s_meta_manager", + Key: helper.MetricLabelKeyRunnerName, + Value: helper.MetricLabelValueRunnerNameK8sMeta, }, { - Key: "project", + Key: helper.MetricLabelKeyProject, Value: strings.Join(projectName, " "), }, } diff --git a/pkg/helper/self_metrics_agent_constants.go b/pkg/helper/self_metrics_agent_constants.go index 52ed601c2e..e236e748da 100644 --- a/pkg/helper/self_metrics_agent_constants.go +++ b/pkg/helper/self_metrics_agent_constants.go @@ -20,6 +20,6 @@ package helper // metric keys const ( - MetricAgentMemoryGo = "agent_go_memory_used_mb" - MetricAgentGoRoutinesTotal = "agent_go_routines_total" + MetricAgentMemoryGo = "go_memory_used_mb" + MetricAgentGoRoutinesTotal = "go_routines_total" ) diff --git a/pkg/helper/self_metrics_plugin_constants.go b/pkg/helper/self_metrics_plugin_constants.go index e7638b215f..4233904edf 100644 --- a/pkg/helper/self_metrics_plugin_constants.go +++ b/pkg/helper/self_metrics_plugin_constants.go @@ -32,33 +32,33 @@ const ( // label values const ( - MetricLabelKeyMetricCategoryPlugin = "plugin" + MetricLabelValueMetricCategoryPlugin = "plugin" ) // metric keys const ( - MetricPluginInEventsTotal = "plugin_in_events_total" - MetricPluginInEventGroupsTotal = "plugin_in_event_groups_total" - MetricPluginInSizeBytes = "plugin_in_size_bytes" - MetricPluginOutEventsTotal = "plugin_out_events_total" - MetricPluginOutEventGroupsTotal = "plugin_out_event_groups_total" - MetricPluginOutSizeBytes = "plugin_out_size_bytes" - MetricPluginTotalDelayMs = "plugin_total_delay_ms" - MetricPluginTotalProcessTimeMs = "plugin_total_process_time_ms" + MetricPluginInEventsTotal = "in_events_total" + MetricPluginInEventGroupsTotal = "in_event_groups_total" + MetricPluginInSizeBytes = "in_size_bytes" + MetricPluginOutEventsTotal = "out_events_total" + MetricPluginOutEventGroupsTotal = "out_event_groups_total" + MetricPluginOutSizeBytes = "out_size_bytes" + MetricPluginTotalDelayMs = "total_delay_ms" + MetricPluginTotalProcessTimeMs = "total_process_time_ms" ) /********************************************************** * input_canal **********************************************************/ const ( - MetricPluginBinlogRotate = "plugin_binlog_rotate" - MetricPluginBinlogSync = "plugin_binlog_sync" - MetricPluginBinlogDdl = "plugin_binlog_ddl" - MetricPluginBinlogRow = "plugin_binlog_row" - MetricPluginBinlogXgid = "plugin_binlog_xgid" - MetricPluginBinlogCheckpoint = "plugin_binlog_checkpoint" - MetricPluginBinlogFilename = "plugin_binlog_filename" - MetricPluginBinlogGtid = "plugin_binlog_gtid" + MetricPluginBinlogRotate = "binlog_rotate" + MetricPluginBinlogSync = "binlog_sync" + MetricPluginBinlogDdl = "binlog_ddl" + MetricPluginBinlogRow = "binlog_row" + MetricPluginBinlogXgid = "binlog_xgid" + MetricPluginBinlogCheckpoint = "binlog_checkpoint" + MetricPluginBinlogFilename = "binlog_filename" + MetricPluginBinlogGtid = "binlog_gtid" ) /********************************************************** @@ -66,10 +66,10 @@ const ( * service_docker_stdout_v2 **********************************************************/ const ( - MetricPluginContainerTotal = "plugin_container_total" - MetricPluginAddContainerTotal = "plugin_add_container_total" - MetricPluginRemoveContainerTotal = "plugin_remove_container_total" - MetricPluginUpdateContainerTotal = "plugin_update_container_total" + MetricPluginContainerTotal = "container_total" + MetricPluginAddContainerTotal = "add_container_total" + MetricPluginRemoveContainerTotal = "remove_container_total" + MetricPluginUpdateContainerTotal = "update_container_total" ) /********************************************************** @@ -77,26 +77,26 @@ const ( * service_rdb **********************************************************/ const ( - MetricPluginCollectAvgCostTimeMs = "plugin_collect_avg_cost_time_ms" - MetricPluginCollectTotal = "plugin_collect_total" + MetricPluginCollectAvgCostTimeMs = "collect_avg_cost_time_ms" + MetricPluginCollectTotal = "collect_total" ) /********************************************************** * service_k8s_meta **********************************************************/ const ( - MetricCollectEntityTotal = "plugin_collect_entity_total" - MetricCollectLinkTotal = "plugin_collect_link_total" + MetricCollectEntityTotal = "collect_entity_total" + MetricCollectLinkTotal = "collect_link_total" ) /********************************************************** * all processor(所有解析类的处理插件通用指标。Todo:目前统计还不全、不准确) **********************************************************/ const ( - MetricPluginDiscardedEventsTotal = "plugin_discarded_events_total" - MetricPluginOutFailedEventsTotal = "plugin_out_failed_events_total" - MetricPluginOutKeyNotFoundEventsTotal = "plugin_out_key_not_found_events_total" - MetricPluginOutSuccessfulEventsTotal = "plugin_out_successful_events_total" + MetricPluginDiscardedEventsTotal = "discarded_events_total" + MetricPluginOutFailedEventsTotal = "out_failed_events_total" + MetricPluginOutKeyNotFoundEventsTotal = "out_key_not_found_events_total" + MetricPluginOutSuccessfulEventsTotal = "out_successful_events_total" ) /********************************************************** @@ -105,12 +105,12 @@ const ( * processor_string_replace **********************************************************/ const ( - PluginPairsPerLogTotal = "plugin_pairs_per_log_total" + PluginPairsPerLogTotal = "pairs_per_log_total" ) func GetPluginCommonLabels(context pipeline.Context, pluginMeta *pipeline.PluginMeta) []pipeline.LabelPair { labels := make([]pipeline.LabelPair, 0) - labels = append(labels, pipeline.LabelPair{Key: MetricLabelKeyMetricCategory, Value: MetricLabelKeyMetricCategoryPlugin}) + labels = append(labels, pipeline.LabelPair{Key: MetricLabelKeyMetricCategory, Value: MetricLabelValueMetricCategoryPlugin}) labels = append(labels, pipeline.LabelPair{Key: MetricLabelKeyProject, Value: context.GetProject()}) labels = append(labels, pipeline.LabelPair{Key: MetricLabelKeyLogstore, Value: context.GetLogstore()}) labels = append(labels, pipeline.LabelPair{Key: MetricLabelKeyPipelineName, Value: context.GetConfigName()}) diff --git a/pkg/helper/self_metrics_runner_constants.go b/pkg/helper/self_metrics_runner_constants.go index 317ac7f23d..fedcb15899 100644 --- a/pkg/helper/self_metrics_runner_constants.go +++ b/pkg/helper/self_metrics_runner_constants.go @@ -18,21 +18,38 @@ package helper // runner ////////////////////////////////////////////////////////////////////////// +// lebel keys +const ( + MetricLabelKeyRunnerName = "runner_name" +) + // label values const ( - MetricLabelKeyMetricCategoryRunner = "runner" + MetricLabelValueMetricCategoryRunner = "runner" ) /********************************************************** * k8s meta **********************************************************/ + +// label keys +const ( + MetricLabelKeyClusterID = "cluster_id" +) + +// label values +const ( + MetricLabelValueRunnerNameK8sMeta = "k8s_meta" +) + +// metric keys const ( - MetricRunnerK8sMetaAddEventTotal = "runner_k8s_meta_add_event_total" - MetricRunnerK8sMetaUpdateEventTotal = "runner_k8s_meta_update_event_total" - MetricRunnerK8sMetaDeleteEventTotal = "runner_k8s_meta_delete_event_total" - MetricRunnerK8sMetaCacheSize = "runner_k8s_meta_cache_size" - MetricRunnerK8sMetaQueueSize = "runner_k8s_meta_queue_size" - MetricRunnerK8sMetaHTTPRequestTotal = "runner_k8s_meta_http_request_total" - MetricRunnerK8sMetaHTTPAvgDelayMs = "runner_k8s_meta_avg_delay_ms" - MetricRunnerK8sMetaHTTPMaxDelayMs = "runner_k8s_meta_max_delay_ms" + MetricRunnerK8sMetaAddEventTotal = "add_event_total" + MetricRunnerK8sMetaUpdateEventTotal = "update_event_total" + MetricRunnerK8sMetaDeleteEventTotal = "delete_event_total" + MetricRunnerK8sMetaCacheSize = "cache_size" + MetricRunnerK8sMetaQueueSize = "queue_size" + MetricRunnerK8sMetaHTTPRequestTotal = "http_request_total" + MetricRunnerK8sMetaHTTPAvgDelayMs = "avg_delay_ms" + MetricRunnerK8sMetaHTTPMaxDelayMs = "max_delay_ms" ) From 9dd05927d6b00d59ed49afc32c7b04ab4fbaeb0f Mon Sep 17 00:00:00 2001 From: dog Date: Wed, 13 Nov 2024 10:32:33 +0800 Subject: [PATCH 07/10] feat: build EventGroup in libCurl Callback (#1860) --- core/prometheus/async/PromHttpRequest.cpp | 46 +------- core/prometheus/async/PromHttpRequest.h | 9 +- core/prometheus/labels/TextParser.cpp | 18 --- core/prometheus/labels/TextParser.h | 1 - .../prometheus/schedulers/ScrapeScheduler.cpp | 82 ++++++++++---- core/prometheus/schedulers/ScrapeScheduler.h | 25 ++++- .../schedulers/TargetSubscriberScheduler.cpp | 6 +- core/unittest/prometheus/PromAsynUnittest.cpp | 2 +- .../prometheus/ScrapeSchedulerUnittest.cpp | 105 ++++++++++++------ 9 files changed, 159 insertions(+), 135 deletions(-) diff --git a/core/prometheus/async/PromHttpRequest.cpp b/core/prometheus/async/PromHttpRequest.cpp index f2bff245d5..cf7001f270 100644 --- a/core/prometheus/async/PromHttpRequest.cpp +++ b/core/prometheus/async/PromHttpRequest.cpp @@ -5,49 +5,9 @@ #include #include "common/http/HttpRequest.h" -#include "prometheus/Constants.h" namespace logtail { -// size_t PromWriteCallback(char* buffer, size_t size, size_t nmemb, void* data) { -// unsigned long sizes = size * nmemb; - -// if (buffer == nullptr) { -// return 0; -// } - -// PromResponseBody* body = static_cast(data); - -// size_t begin = 0; -// while (begin < sizes) { -// for (size_t end = begin; end < sizes; ++end) { -// if (buffer[end] == '\n') { -// if (begin == 0) { -// body->mCache.append(buffer, end); -// if (!body->mCache.empty()) { -// auto e = body->mEventGroup.AddLogEvent(); -// auto sb = body->mEventGroup.GetSourceBuffer()->CopyString(body->mCache); -// body->mCache.clear(); -// e->SetContentNoCopy(prometheus::PROMETHEUS, StringView(sb.data, sb.size)); -// } -// } else if (begin != end) { -// auto e = body->mEventGroup.AddLogEvent(); -// auto sb = body->mEventGroup.GetSourceBuffer()->CopyString(buffer + begin, end - begin); -// e->SetContentNoCopy(prometheus::PROMETHEUS, StringView(sb.data, sb.size)); -// } -// begin += end - begin + 1; -// continue; -// } -// } -// break; -// } -// if (begin < sizes) { -// body->mCache.append(buffer + begin, sizes - begin); -// } -// body->mRawSize += sizes; -// return sizes; -// } - PromHttpRequest::PromHttpRequest(const std::string& method, bool httpsFlag, const std::string& host, @@ -56,6 +16,7 @@ PromHttpRequest::PromHttpRequest(const std::string& method, const std::string& query, const std::map& header, const std::string& body, + HttpResponse&& response, uint32_t timeout, uint32_t maxTryCnt, std::shared_ptr> future, @@ -68,10 +29,7 @@ PromHttpRequest::PromHttpRequest(const std::string& method, query, header, body, - // HttpResponse( - // new PromResponseBody(), [](void* ptr) { delete static_cast(ptr); }, - // PromWriteCallback), - HttpResponse(), + std::move(response), timeout, maxTryCnt), mFuture(std::move(future)), diff --git a/core/prometheus/async/PromHttpRequest.h b/core/prometheus/async/PromHttpRequest.h index 74f8db719d..e54203eb11 100644 --- a/core/prometheus/async/PromHttpRequest.h +++ b/core/prometheus/async/PromHttpRequest.h @@ -19,6 +19,7 @@ class PromHttpRequest : public AsynHttpRequest { const std::string& query, const std::map& header, const std::string& body, + HttpResponse&& response, uint32_t timeout, uint32_t maxTryCnt, std::shared_ptr> future, @@ -36,12 +37,4 @@ class PromHttpRequest : public AsynHttpRequest { std::shared_ptr> mIsContextValidFuture; }; -struct PromResponseBody { - PipelineEventGroup mEventGroup; - std::string mCache; - size_t mRawSize = 0; - - PromResponseBody() : mEventGroup(std::make_shared()) {}; -}; - } // namespace logtail \ No newline at end of file diff --git a/core/prometheus/labels/TextParser.cpp b/core/prometheus/labels/TextParser.cpp index f0dcbe678b..a3ecb394ab 100644 --- a/core/prometheus/labels/TextParser.cpp +++ b/core/prometheus/labels/TextParser.cpp @@ -68,24 +68,6 @@ PipelineEventGroup TextParser::Parse(const string& content, uint64_t defaultTime return eGroup; } -PipelineEventGroup TextParser::BuildLogGroup(const string& content) { - PipelineEventGroup eGroup(std::make_shared()); - - vector lines; - // pre-reserve vector size by 1024 which is experience value per line - lines.reserve(content.size() / 1024); - SplitStringView(content, '\n', lines); - for (const auto& line : lines) { - if (!IsValidMetric(line)) { - continue; - } - auto* logEvent = eGroup.AddLogEvent(); - logEvent->SetContent(prometheus::PROMETHEUS, line); - } - - return eGroup; -} - bool TextParser::ParseLine(StringView line, MetricEvent& metricEvent) { mLine = line; mPos = 0; diff --git a/core/prometheus/labels/TextParser.h b/core/prometheus/labels/TextParser.h index e8f7b86bd4..0eb8899667 100644 --- a/core/prometheus/labels/TextParser.h +++ b/core/prometheus/labels/TextParser.h @@ -33,7 +33,6 @@ class TextParser { void SetDefaultTimestamp(uint64_t defaultTimestamp, uint32_t defaultNanoSec); PipelineEventGroup Parse(const std::string& content, uint64_t defaultTimestamp, uint32_t defaultNanoSec); - PipelineEventGroup BuildLogGroup(const std::string& content); bool ParseLine(StringView line, MetricEvent& metricEvent); diff --git a/core/prometheus/schedulers/ScrapeScheduler.cpp b/core/prometheus/schedulers/ScrapeScheduler.cpp index 0908cb6ea7..a830558227 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.cpp +++ b/core/prometheus/schedulers/ScrapeScheduler.cpp @@ -30,6 +30,7 @@ #include "pipeline/queue/ProcessQueueManager.h" #include "pipeline/queue/QueueKey.h" #include "prometheus/Constants.h" +#include "prometheus/Utils.h" #include "prometheus/async/PromFuture.h" #include "prometheus/async/PromHttpRequest.h" #include "sdk/Common.h" @@ -38,6 +39,35 @@ using namespace std; namespace logtail { +size_t PromMetricWriteCallback(char* buffer, size_t size, size_t nmemb, void* data) { + uint64_t sizes = size * nmemb; + + if (buffer == nullptr || data == nullptr) { + return 0; + } + + auto* body = static_cast(data); + + size_t begin = 0; + for (size_t end = begin; end < sizes; ++end) { + if (buffer[end] == '\n') { + if (begin == 0 && !body->mCache.empty()) { + body->mCache.append(buffer, end); + body->AddEvent(body->mCache.data(), body->mCache.size()); + body->mCache.clear(); + } else if (begin != end) { + body->AddEvent(buffer + begin, end - begin); + } + begin = end + 1; + } + } + if (begin < sizes) { + body->mCache.append(buffer + begin, sizes - begin); + } + body->mRawSize += sizes; + return sizes; +} + ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, std::string host, int32_t port, @@ -61,15 +91,17 @@ ScrapeScheduler::ScrapeScheduler(std::shared_ptr scrapeConfigPtr, } void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t timestampMilliSec) { - auto& responseBody = *response.GetBody(); + auto& responseBody = *response.GetBody(); + responseBody.FlushCache(); mSelfMonitor->AddCounter(METRIC_PLUGIN_OUT_EVENTS_TOTAL, response.GetStatusCode()); - mSelfMonitor->AddCounter(METRIC_PLUGIN_OUT_SIZE_BYTES, response.GetStatusCode(), responseBody.size()); - mSelfMonitor->AddCounter( - METRIC_PLUGIN_PROM_SCRAPE_TIME_MS, response.GetStatusCode(), GetCurrentTimeInMilliSeconds() - timestampMilliSec); + mSelfMonitor->AddCounter(METRIC_PLUGIN_OUT_SIZE_BYTES, response.GetStatusCode(), responseBody.mRawSize); + mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_TIME_MS, + response.GetStatusCode(), + GetCurrentTimeInMilliSeconds() - timestampMilliSec); mScrapeTimestampMilliSec = timestampMilliSec; mScrapeDurationSeconds = 1.0 * (GetCurrentTimeInMilliSeconds() - timestampMilliSec) / 1000; - mScrapeResponseSizeBytes = responseBody.size(); + mScrapeResponseSizeBytes = responseBody.mRawSize; mUpState = response.GetStatusCode() == 200; if (response.GetStatusCode() != 200) { mScrapeResponseSizeBytes = 0; @@ -77,10 +109,11 @@ void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t timestampM for (const auto& [k, v] : mScrapeConfigPtr->mRequestHeaders) { headerStr.append(k).append(":").append(v).append(";"); } - LOG_WARNING(sLogger, - ("scrape failed, status code", response.GetStatusCode())("target", mHash)("http header", headerStr)); + LOG_WARNING( + sLogger, + ("scrape failed, status code", response.GetStatusCode())("target", mHash)("http header", headerStr)); } - auto eventGroup = BuildPipelineEventGroup(responseBody); + auto& eventGroup = responseBody.mEventGroup; SetAutoMetricMeta(eventGroup); SetTargetLabels(eventGroup); @@ -99,10 +132,6 @@ void ScrapeScheduler::SetTargetLabels(PipelineEventGroup& eGroup) { mTargetLabels.Range([&eGroup](const std::string& key, const std::string& value) { eGroup.SetTag(key, value); }); } -PipelineEventGroup ScrapeScheduler::BuildPipelineEventGroup(const std::string& content) { - return mParser->BuildLogGroup(content); -} - void ScrapeScheduler::PushEventGroup(PipelineEventGroup&& eGroup) { auto item = make_unique(std::move(eGroup), mInputIndex); #ifdef APSARA_UNIT_TEST_MAIN @@ -175,18 +204,23 @@ std::unique_ptr ScrapeScheduler::BuildScrapeTimerEvent(std::chrono:: if (retry > 0) { retry -= 1; } - auto request = std::make_unique(sdk::HTTP_GET, - mScrapeConfigPtr->mScheme == prometheus::HTTPS, - mHost, - mPort, - mScrapeConfigPtr->mMetricsPath, - mScrapeConfigPtr->mQueryString, - mScrapeConfigPtr->mRequestHeaders, - "", - mScrapeConfigPtr->mScrapeTimeoutSeconds, - retry, - this->mFuture, - this->mIsContextValidFuture); + auto request + = std::make_unique(sdk::HTTP_GET, + mScrapeConfigPtr->mScheme == prometheus::HTTPS, + mHost, + mPort, + mScrapeConfigPtr->mMetricsPath, + mScrapeConfigPtr->mQueryString, + mScrapeConfigPtr->mRequestHeaders, + "", + HttpResponse( + new PromMetricResponseBody(), + [](void* ptr) { delete static_cast(ptr); }, + PromMetricWriteCallback), + mScrapeConfigPtr->mScrapeTimeoutSeconds, + retry, + this->mFuture, + this->mIsContextValidFuture); auto timerEvent = std::make_unique(execTime, std::move(request)); return timerEvent; } diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index c7c4e381b7..a9a526924d 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -25,7 +25,9 @@ #include "models/PipelineEventGroup.h" #include "monitor/LoongCollectorMetricTypes.h" #include "pipeline/queue/QueueKey.h" +#include "prometheus/Constants.h" #include "prometheus/PromSelfMonitor.h" +#include "prometheus/Utils.h" #include "prometheus/labels/TextParser.h" #include "prometheus/schedulers/ScrapeConfig.h" @@ -35,6 +37,27 @@ namespace logtail { +size_t PromMetricWriteCallback(char* buffer, size_t size, size_t nmemb, void* data); + +struct PromMetricResponseBody { + PipelineEventGroup mEventGroup; + std::string mCache; + size_t mRawSize = 0; + + PromMetricResponseBody() : mEventGroup(std::make_shared()) {}; + void AddEvent(char* line, size_t len) { + if (IsValidMetric(StringView(line, len))) { + auto* e = mEventGroup.AddLogEvent(); + auto sb = mEventGroup.GetSourceBuffer()->CopyString(line, len); + e->SetContentNoCopy(prometheus::PROMETHEUS, StringView(sb.data, sb.size)); + } + } + void FlushCache() { + AddEvent(mCache.data(), mCache.size()); + mCache.clear(); + } +}; + class ScrapeScheduler : public BaseScheduler { public: ScrapeScheduler(std::shared_ptr scrapeConfigPtr, @@ -61,8 +84,6 @@ class ScrapeScheduler : public BaseScheduler { void SetAutoMetricMeta(PipelineEventGroup& eGroup); void SetTargetLabels(PipelineEventGroup& eGroup); - PipelineEventGroup BuildPipelineEventGroup(const std::string& content); - std::unique_ptr BuildScrapeTimerEvent(std::chrono::steady_clock::time_point execTime); std::shared_ptr mScrapeConfigPtr; diff --git a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp index d44948e469..178c952dba 100644 --- a/core/prometheus/schedulers/TargetSubscriberScheduler.cpp +++ b/core/prometheus/schedulers/TargetSubscriberScheduler.cpp @@ -59,8 +59,9 @@ bool TargetSubscriberScheduler::operator<(const TargetSubscriberScheduler& other void TargetSubscriberScheduler::OnSubscription(HttpResponse& response, uint64_t timestampMilliSec) { mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL, response.GetStatusCode()); - mSelfMonitor->AddCounter( - METRIC_PLUGIN_PROM_SUBSCRIBE_TIME_MS, response.GetStatusCode(), GetCurrentTimeInMilliSeconds() - timestampMilliSec); + mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TIME_MS, + response.GetStatusCode(), + GetCurrentTimeInMilliSeconds() - timestampMilliSec); if (response.GetStatusCode() == 304) { // not modified return; @@ -304,6 +305,7 @@ TargetSubscriberScheduler::BuildSubscriberTimerEvent(std::chrono::steady_clock:: "collector_id=" + mPodName, httpHeader, "", + HttpResponse(), prometheus::RefeshIntervalSeconds, 1, this->mFuture); diff --git a/core/unittest/prometheus/PromAsynUnittest.cpp b/core/unittest/prometheus/PromAsynUnittest.cpp index 625fb0e6bc..71a2212bda 100644 --- a/core/unittest/prometheus/PromAsynUnittest.cpp +++ b/core/unittest/prometheus/PromAsynUnittest.cpp @@ -24,7 +24,7 @@ void PromAsynUnittest::TestExecTime() { return true; }); auto request = std::make_shared( - "http", false, "127.0.0.1", 8080, "/", "", map(), "", 10, 3, future); + "http", false, "127.0.0.1", 8080, "/", "", map(), "", HttpResponse(), 10, 3, future); auto asynRequest = std::dynamic_pointer_cast(request); asynRequest->mLastSendTime = now; auto response = HttpResponse{}; diff --git a/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp b/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp index 80e2d5f5d7..d51cc4e57e 100644 --- a/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp +++ b/core/unittest/prometheus/ScrapeSchedulerUnittest.cpp @@ -19,6 +19,7 @@ #include #include "common/StringTools.h" +#include "common/http/HttpResponse.h" #include "common/timer/Timer.h" #include "prometheus/Constants.h" #include "prometheus/async/PromFuture.h" @@ -35,7 +36,7 @@ class ScrapeSchedulerUnittest : public testing::Test { public: void TestInitscrapeScheduler(); void TestProcess(); - void TestSplitByLines(); + void TestStreamMetricWriteCallback(); void TestReceiveMessage(); void TestScheduler(); @@ -50,36 +51,10 @@ class ScrapeSchedulerUnittest : public testing::Test { mScrapeConfig->mScrapeTimeoutSeconds = 10; mScrapeConfig->mMetricsPath = "/metrics"; mScrapeConfig->mRequestHeaders = {{"Authorization", "Bearer xxxxx"}}; - - *mHttpResponse.GetBody() - = "# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles.\n" - "# TYPE go_gc_duration_seconds summary\n" - "go_gc_duration_seconds{quantile=\"0\"} 1.5531e-05\n" - "go_gc_duration_seconds{quantile=\"0.25\"} 3.9357e-05\n" - "go_gc_duration_seconds{quantile=\"0.5\"} 4.1114e-05\n" - "go_gc_duration_seconds{quantile=\"0.75\"} 4.3372e-05\n" - "go_gc_duration_seconds{quantile=\"1\"} 0.000112326\n" - "go_gc_duration_seconds_sum 0.034885631\n" - "go_gc_duration_seconds_count 850\n" - "# HELP go_goroutines Number of goroutines that currently exist.\n" - "# TYPE go_goroutines gauge\n" - "go_goroutines 7\n" - "# HELP go_info Information about the Go environment.\n" - "# TYPE go_info gauge\n" - "go_info{version=\"go1.22.3\"} 1\n" - "# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use.\n" - "# TYPE go_memstats_alloc_bytes gauge\n" - "go_memstats_alloc_bytes 6.742688e+06\n" - "# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed.\n" - "# TYPE go_memstats_alloc_bytes_total counter\n" - "go_memstats_alloc_bytes_total 1.5159292e+08"; - - mHttpResponse.SetStatusCode(200); } private: std::shared_ptr mScrapeConfig; - HttpResponse mHttpResponse; }; void ScrapeSchedulerUnittest::TestInitscrapeScheduler() { @@ -90,6 +65,8 @@ void ScrapeSchedulerUnittest::TestInitscrapeScheduler() { } void ScrapeSchedulerUnittest::TestProcess() { + HttpResponse httpResponse = HttpResponse( + new PromMetricResponseBody(), [](void* ptr) { delete static_cast(ptr); }, PromMetricWriteCallback); Labels labels; labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); @@ -99,25 +76,77 @@ void ScrapeSchedulerUnittest::TestProcess() { APSARA_TEST_EQUAL(event.GetId(), "test_jobhttp://localhost:8080/metrics" + ToString(labels.Hash())); // if status code is not 200, no data will be processed // but will continue running, sending self-monitoring metrics - mHttpResponse.SetStatusCode(503); - event.OnMetricResult(mHttpResponse, 0); + httpResponse.SetStatusCode(503); + event.OnMetricResult(httpResponse, 0); APSARA_TEST_EQUAL(1UL, event.mItem.size()); event.mItem.clear(); - mHttpResponse.SetStatusCode(200); - event.OnMetricResult(mHttpResponse, 0); + httpResponse.GetBody()->mEventGroup = PipelineEventGroup(std::make_shared()); + httpResponse.SetStatusCode(200); + string body1 = "# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles.\n" + "# TYPE go_gc_duration_seconds summary\n" + "go_gc_duration_seconds{quantile=\"0\"} 1.5531e-05\n" + "go_gc_duration_seconds{quantile=\"0.25\"} 3.9357e-05\n" + "go_gc_duration_seconds{quantile=\"0.5\"} 4.1114e-05\n" + "go_gc_duration_seconds{quantile=\"0.75\"} 4.3372e-05\n" + "go_gc_duration_seconds{quantile=\"1\"} 0.000112326\n" + "go_gc_duration_seconds_sum 0.034885631\n" + "go_gc_duration_seconds_count 850\n" + "# HELP go_goroutines Number of goroutines that currently exist.\n" + "# TYPE go_goroutines gauge\n" + "go_goroutines 7\n" + "# HELP go_info Information about the Go environment.\n" + "# TYPE go_info gauge\n" + "go_info{version=\"go1.22.3\"} 1\n" + "# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use.\n" + "# TYPE go_memstats_alloc_bytes gauge\n" + "go_memstats_alloc_bytes 6.742688e+06\n" + "# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed.\n" + "# TYPE go_memstats_alloc_bytes_total counter\n" + "go_memstats_alloc_bytes_total 1.5159292e+08"; + PromMetricWriteCallback( + body1.data(), (size_t)1, (size_t)body1.length(), (void*)httpResponse.GetBody()); + event.OnMetricResult(httpResponse, 0); APSARA_TEST_EQUAL(1UL, event.mItem.size()); APSARA_TEST_EQUAL(11UL, event.mItem[0]->mEventGroup.GetEvents().size()); } -void ScrapeSchedulerUnittest::TestSplitByLines() { +void ScrapeSchedulerUnittest::TestStreamMetricWriteCallback() { + HttpResponse httpResponse = HttpResponse( + new PromMetricResponseBody(), [](void* ptr) { delete static_cast(ptr); }, PromMetricWriteCallback); Labels labels; labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); labels.Set(prometheus::ADDRESS_LABEL_NAME, "localhost:8080"); ScrapeScheduler event(mScrapeConfig, "localhost", 8080, labels, 0, 0); APSARA_TEST_EQUAL(event.GetId(), "test_jobhttp://localhost:8080/metrics" + ToString(labels.Hash())); - auto res = event.BuildPipelineEventGroup(*mHttpResponse.GetBody()); - APSARA_TEST_EQUAL(11UL, res.GetEvents().size()); + + string body1 = "# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles.\n" + "# TYPE go_gc_duration_seconds summary\n" + "go_gc_duration_seconds{quantile=\"0\"} 1.5531e-05\n" + "go_gc_duration_seconds{quantile=\"0.25\"} 3.9357e-05\n" + "go_gc_duration_seconds{quantile=\"0.5\"} 4.1114e-05\n" + "go_gc_duration_seconds{quantile=\"0.75\"} 4.3372e-05\n" + "go_gc_duration_seconds{quantile=\"1\"} 0.000112326\n" + "go_gc_duration_seconds_sum 0.034885631\n" + "go_gc_duration_seconds_count 850\n" + "# HELP go_goroutines Number of goroutines t" + "hat currently exist.\n" + "# TYPE go_goroutines gauge\n" + "go_go"; + string body2 = "routines 7\n" + "# HELP go_info Information about the Go environment.\n" + "# TYPE go_info gauge\n" + "go_info{version=\"go1.22.3\"} 1\n" + "# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use.\n" + "# TYPE go_memstats_alloc_bytes gauge\n" + "go_memstats_alloc_bytes 6.742688e+06\n" + "# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed.\n" + "# TYPE go_memstats_alloc_bytes_total counter\n" + "go_memstats_alloc_bytes_total 1.5159292e+08"; + PromMetricWriteCallback( + body1.data(), (size_t)1, (size_t)body1.length(), (void*)httpResponse.GetBody()); + auto& res = httpResponse.GetBody()->mEventGroup; + APSARA_TEST_EQUAL(7UL, res.GetEvents().size()); APSARA_TEST_EQUAL("go_gc_duration_seconds{quantile=\"0\"} 1.5531e-05", res.GetEvents()[0].Cast().GetContent(prometheus::PROMETHEUS).to_string()); APSARA_TEST_EQUAL("go_gc_duration_seconds{quantile=\"0.25\"} 3.9357e-05", @@ -132,6 +161,12 @@ void ScrapeSchedulerUnittest::TestSplitByLines() { res.GetEvents()[5].Cast().GetContent(prometheus::PROMETHEUS).to_string()); APSARA_TEST_EQUAL("go_gc_duration_seconds_count 850", res.GetEvents()[6].Cast().GetContent(prometheus::PROMETHEUS).to_string()); + // httpResponse.GetBody()->mEventGroup = PipelineEventGroup(std::make_shared()); + PromMetricWriteCallback( + body2.data(), (size_t)1, (size_t)body2.length(), (void*)httpResponse.GetBody()); + httpResponse.GetBody()->FlushCache(); + APSARA_TEST_EQUAL(11UL, res.GetEvents().size()); + APSARA_TEST_EQUAL("go_goroutines 7", res.GetEvents()[7].Cast().GetContent(prometheus::PROMETHEUS).to_string()); APSARA_TEST_EQUAL("go_info{version=\"go1.22.3\"} 1", @@ -199,7 +234,7 @@ void ScrapeSchedulerUnittest::TestQueueIsFull() { UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestInitscrapeScheduler) UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestProcess) -UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestSplitByLines) +UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestStreamMetricWriteCallback) UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestScheduler) UNIT_TEST_CASE(ScrapeSchedulerUnittest, TestQueueIsFull) From 390f941d509641fa9e63d0fa771d6e17c3d7a047 Mon Sep 17 00:00:00 2001 From: Zhu Shunjia Date: Wed, 13 Nov 2024 14:22:55 +0800 Subject: [PATCH 08/10] fix: prometheus flusher ut (#1876) use waitgroup to make sure that all request are handled. --- plugins/flusher/prometheus/flusher_prometheus_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/plugins/flusher/prometheus/flusher_prometheus_test.go b/plugins/flusher/prometheus/flusher_prometheus_test.go index 797352a7d4..98ae4cbad1 100644 --- a/plugins/flusher/prometheus/flusher_prometheus_test.go +++ b/plugins/flusher/prometheus/flusher_prometheus_test.go @@ -22,6 +22,7 @@ import ( "net/http" "sort" "strings" + "sync" "testing" "time" @@ -117,8 +118,10 @@ func TestPrometheusFlusher_ShouldWriteToRemoteStorageSuccess_GivenCorrectDataWit httpmock.Activate() defer httpmock.DeactivateAndReset() + var wg sync.WaitGroup httpmock.RegisterResponder("POST", endpoint, func(req *http.Request) (*http.Response, error) { + defer wg.Done() username, password, err := parseBasicAuth(req.Header.Get("Authorization")) if err != nil { return httpmock.NewStringResponse(http.StatusUnauthorized, "Invalid Authorization"), fmt.Errorf("invalid authentication: %w", err) @@ -238,10 +241,12 @@ func TestPrometheusFlusher_ShouldWriteToRemoteStorageSuccess_GivenCorrectDataWit } expectedWriteRequest = sortPromLabelsInWriteRequest(expectedWriteRequest) httpmock.ZeroCallCounters() + wg.Add(2) err := flusher.Export(groupEventsSlice, nil) So(err, ShouldBeNil) time.Sleep(1 * time.Second) // guarantee that all http requests are handled + wg.Wait() err = flusher.Stop() So(err, ShouldBeNil) From 6e7991dfe5987e90784eab407de73b2d2ee406c4 Mon Sep 17 00:00:00 2001 From: Takuka0311 <1914426213@qq.com> Date: Wed, 13 Nov 2024 14:30:22 +0800 Subject: [PATCH 09/10] Refactor Self-Monitor Directory Structure, Filenames, and Class Names (#1881) * init * polish --- core/CMakeLists.txt | 2 +- core/app_config/AppConfig.cpp | 6 +- core/application/Application.cpp | 16 +- core/checkpoint/AdhocCheckpointManager.cpp | 6 +- core/checkpoint/AdhocJobCheckpoint.cpp | 8 +- core/checkpoint/CheckPointManager.cpp | 24 +- core/checkpoint/CheckpointManagerV2.cpp | 4 +- core/common/EncodingConverter.cpp | 4 +- core/common/ParamExtractor.h | 2 +- core/common/compression/Compressor.h | 2 +- core/config/PipelineConfig.cpp | 2 +- core/config/watcher/ConfigWatcher.cpp | 6 +- core/ebpf/SelfMonitor.h | 2 +- core/ebpf/eBPFServer.h | 2 +- core/ebpf/handler/AbstractHandler.h | 4 +- core/file_server/ConfigManager.cpp | 18 +- core/file_server/EventDispatcher.cpp | 22 +- core/file_server/FileServer.cpp | 2 +- core/file_server/FileServer.h | 2 +- .../event_handler/EventHandler.cpp | 14 +- core/file_server/event_handler/LogInput.cpp | 4 +- .../event_listener/EventListener_Linux.cpp | 4 +- core/file_server/polling/PollingDirFile.cpp | 16 +- core/file_server/polling/PollingModify.cpp | 6 +- core/file_server/reader/LogFileReader.cpp | 42 ++-- core/go_pipeline/LogtailPlugin.cpp | 6 +- core/logtail_windows.cpp | 2 +- .../{LogtailAlarm.cpp => AlarmManager.cpp} | 40 +-- .../{LogtailAlarm.h => AlarmManager.h} | 24 +- core/monitor/MetricExportor.cpp | 2 +- .../{LogtailMetric.cpp => MetricManager.cpp} | 230 +----------------- core/monitor/MetricManager.h | 89 +++++++ core/monitor/MetricRecord.cpp | 182 ++++++++++++++ .../{LogtailMetric.h => MetricRecord.h} | 95 +------- ...ngCollectorMetricTypes.h => MetricTypes.h} | 0 core/monitor/Monitor.cpp | 6 +- core/monitor/Monitor.h | 2 +- core/monitor/PluginMetricManager.cpp | 66 +++++ core/monitor/PluginMetricManager.h | 25 +- .../profile_sender/ProfileSender.cpp | 4 +- .../profile_sender/ProfileSender.h | 0 core/pipeline/Pipeline.cpp | 6 +- core/pipeline/Pipeline.h | 2 +- core/pipeline/PipelineContext.h | 6 +- core/pipeline/PipelineManager.cpp | 4 +- core/pipeline/batch/Batcher.h | 2 +- .../plugin/instance/ProcessorInstance.h | 2 +- core/pipeline/plugin/interface/Flusher.cpp | 4 +- core/pipeline/plugin/interface/Plugin.h | 2 +- core/pipeline/queue/QueueInterface.h | 2 +- core/pipeline/route/Router.h | 2 +- core/plugin/flusher/sls/DiskBufferWriter.cpp | 58 ++--- core/plugin/flusher/sls/FlusherSLS.cpp | 12 +- .../ProcessorParseDelimiterNative.cpp | 4 +- .../processor/ProcessorParseJsonNative.cpp | 8 +- .../ProcessorParseTimestampNative.cpp | 8 +- core/plugin/processor/ProcessorSPL.h | 2 +- .../ProcessorMergeMultilineLogNative.cpp | 2 +- .../ProcessorParseContainerLogNative.cpp | 4 +- ...ProcessorSplitMultilineLogStringNative.cpp | 2 +- core/prometheus/PromSelfMonitor.cpp | 2 +- core/prometheus/PromSelfMonitor.h | 2 +- core/prometheus/PrometheusInputRunner.h | 4 +- core/prometheus/schedulers/ScrapeScheduler.h | 2 +- core/provider/CMakeLists.txt | 2 +- core/provider/Provider.h | 2 +- core/runner/FlusherRunner.cpp | 2 +- core/runner/FlusherRunner.h | 2 +- core/runner/ProcessorRunner.cpp | 2 +- core/runner/ProcessorRunner.h | 2 +- core/runner/sink/http/HttpSink.h | 2 +- .../unittest/config/ConfigUpdatorUnittest.cpp | 2 +- .../input/InputNetworkObserverUnittest.cpp | 1 - core/unittest/monitor/CMakeLists.txt | 8 +- ...Unittest.cpp => MetricManagerUnittest.cpp} | 16 +- core/unittest/polling/PollingUnittest.cpp | 2 +- core/unittest/sender/SenderUnittest.cpp | 2 +- 77 files changed, 611 insertions(+), 569 deletions(-) rename core/monitor/{LogtailAlarm.cpp => AlarmManager.cpp} (92%) rename core/monitor/{LogtailAlarm.h => AlarmManager.h} (87%) rename core/monitor/{LogtailMetric.cpp => MetricManager.cpp} (61%) create mode 100644 core/monitor/MetricManager.h create mode 100644 core/monitor/MetricRecord.cpp rename core/monitor/{LogtailMetric.h => MetricRecord.h} (59%) rename core/monitor/{LoongCollectorMetricTypes.h => MetricTypes.h} (100%) rename core/{ => monitor}/profile_sender/ProfileSender.cpp (98%) rename core/{ => monitor}/profile_sender/ProfileSender.h (100%) rename core/unittest/monitor/{LogtailMetricUnittest.cpp => MetricManagerUnittest.cpp} (95%) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 9c9d7cffab..fe5810d906 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -113,7 +113,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/plugin/flusher/links.cmake) # Subdirectories (modules). except for common, input, processor, flusher, observer, helper, spl, and provider. set(SUB_DIRECTORIES_LIST - application app_config checkpoint container_manager metadata logger go_pipeline monitor monitor/metric_constants profile_sender models + application app_config checkpoint container_manager metadata logger go_pipeline monitor monitor/metric_constants monitor/profile_sender models config config/watcher constants pipeline pipeline/batch pipeline/limiter pipeline/plugin pipeline/plugin/creator pipeline/plugin/instance pipeline/plugin/interface pipeline/queue pipeline/route pipeline/serializer runner runner/sink/http diff --git a/core/app_config/AppConfig.cpp b/core/app_config/AppConfig.cpp index 5610fab601..87319256c5 100644 --- a/core/app_config/AppConfig.cpp +++ b/core/app_config/AppConfig.cpp @@ -31,7 +31,7 @@ #include "json/value.h" #include "logger/Logger.h" #include "monitor/LogFileProfiler.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "monitor/Monitor.h" #ifdef __ENTERPRISE__ #include "config/provider/EnterpriseConfigProvider.h" @@ -660,10 +660,10 @@ void AppConfig::loadAppConfigLogtailMode(const std::string& ilogtailConfigFile) confJson.clear(); if (res == CONFIG_NOT_EXIST) { LOG_ERROR(sLogger, ("can not find start config", ilogtailConfigFile)); - LogtailAlarm::GetInstance()->SendAlarm(LOGTAIL_CONFIG_ALARM, "can not find start config"); + AlarmManager::GetInstance()->SendAlarm(LOGTAIL_CONFIG_ALARM, "can not find start config"); } else if (res == CONFIG_INVALID_FORMAT) { LOG_ERROR(sLogger, ("start config is not valid json", ilogtailConfigFile)); - LogtailAlarm::GetInstance()->SendAlarm(LOGTAIL_CONFIG_ALARM, "start config is not valid json"); + AlarmManager::GetInstance()->SendAlarm(LOGTAIL_CONFIG_ALARM, "start config is not valid json"); } } } diff --git a/core/application/Application.cpp b/core/application/Application.cpp index 70d095c4a8..3fcd0fabeb 100644 --- a/core/application/Application.cpp +++ b/core/application/Application.cpp @@ -117,13 +117,13 @@ void Application::Init() { EnterpriseConfigProvider::GetInstance()->Init("enterprise"); EnterpriseConfigProvider::GetInstance()->LoadRegionConfig(); if (GlobalConf::Instance()->mStartWorkerStatus == "Crash") { - LogtailAlarm::GetInstance()->SendAlarm(LOGTAIL_CRASH_ALARM, "Logtail Restart"); + AlarmManager::GetInstance()->SendAlarm(LOGTAIL_CRASH_ALARM, "Logtail Restart"); } // get last crash info string backTraceStr = GetCrashBackTrace(); if (!backTraceStr.empty()) { LOG_ERROR(sLogger, ("last logtail crash stack", backTraceStr)); - LogtailAlarm::GetInstance()->SendAlarm(LOGTAIL_CRASH_STACK_ALARM, backTraceStr); + AlarmManager::GetInstance()->SendAlarm(LOGTAIL_CRASH_STACK_ALARM, backTraceStr); } if (BOOL_FLAG(ilogtail_disable_core)) { InitCrashBackTrace(); @@ -229,7 +229,7 @@ void Application::Start() { // GCOVR_EXCL_START InitRemoteConfigProviders(); #endif - LogtailAlarm::GetInstance()->Init(); + AlarmManager::GetInstance()->Init(); LoongCollectorMonitor::GetInstance()->Init(); LogtailMonitor::GetInstance()->Init(); @@ -368,7 +368,7 @@ void Application::Exit() { LogtailMonitor::GetInstance()->Stop(); LoongCollectorMonitor::GetInstance()->Stop(); - LogtailAlarm::GetInstance()->Stop(); + AlarmManager::GetInstance()->Stop(); LogtailPlugin::GetInstance()->StopBuiltInModules(); // from now on, alarm should not be used. @@ -391,9 +391,9 @@ void Application::CheckCriticalCondition(int32_t curTime) { // force to exit if config update thread is block more than 1 hour if (lastGetConfigTime > 0 && curTime - lastGetConfigTime > 3600) { LOG_ERROR(sLogger, ("last config get time is too old", lastGetConfigTime)("prepare force exit", "")); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( LOGTAIL_CRASH_ALARM, "last config get time is too old: " + ToString(lastGetConfigTime) + " force exit"); - LogtailAlarm::GetInstance()->ForceToSend(); + AlarmManager::GetInstance()->ForceToSend(); sleep(10); _exit(1); } @@ -402,9 +402,9 @@ void Application::CheckCriticalCondition(int32_t curTime) { // work around for no network when docker start if (BOOL_FLAG(send_prefer_real_ip) && !BOOL_FLAG(global_network_success) && curTime - mStartTime > 7200) { LOG_ERROR(sLogger, ("network is fail", "prepare force exit")); - LogtailAlarm::GetInstance()->SendAlarm(LOGTAIL_CRASH_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOGTAIL_CRASH_ALARM, "network is fail since " + ToString(mStartTime) + " force exit"); - LogtailAlarm::GetInstance()->ForceToSend(); + AlarmManager::GetInstance()->ForceToSend(); sleep(10); _exit(1); } diff --git a/core/checkpoint/AdhocCheckpointManager.cpp b/core/checkpoint/AdhocCheckpointManager.cpp index 9b64c0f03d..9d51759b53 100644 --- a/core/checkpoint/AdhocCheckpointManager.cpp +++ b/core/checkpoint/AdhocCheckpointManager.cpp @@ -19,7 +19,7 @@ #include "common/FileSystemUtil.h" #include "common/Flags.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "common/Thread.h" #include "common/HashUtil.h" @@ -149,7 +149,7 @@ void AdhocCheckpointManager::LoadAdhocCheckpoint() { std::vector jobList; if (!GetAllFiles(adhocCheckpointDir, "*", jobList)) { LOG_WARNING(sLogger, ("get all adhoc checkpoint files", "failed")); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "Load adhoc check point files failed"); + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "Load adhoc check point files failed"); return; } @@ -161,7 +161,7 @@ void AdhocCheckpointManager::LoadAdhocCheckpoint() { } } else if (!Mkdir(adhocCheckpointDir)) { LOG_WARNING(sLogger, ("Create adhoc checkpoint dir", "failed")); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "Create adhoc check point dir failed"); + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "Create adhoc check point dir failed"); } } diff --git a/core/checkpoint/AdhocJobCheckpoint.cpp b/core/checkpoint/AdhocJobCheckpoint.cpp index 86fd3f64b2..dbe9430730 100644 --- a/core/checkpoint/AdhocJobCheckpoint.cpp +++ b/core/checkpoint/AdhocJobCheckpoint.cpp @@ -18,7 +18,7 @@ #include "AdhocJobCheckpoint.h" #include "common/FileSystemUtil.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" namespace logtail { @@ -86,7 +86,7 @@ bool AdhocJobCheckpoint::Load(const std::string& path) { std::ifstream ifs(path); if (!ifs.is_open()) { LOG_ERROR(sLogger, ("open adhoc check point file error when load, file path", path)); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "open check point file failed"); + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "open check point file failed"); return false; } @@ -157,7 +157,7 @@ void AdhocJobCheckpoint::Dump(const std::string& path, bool isAutoDump) { if (!Mkdirs(ParentPath(path))) { LOG_ERROR(sLogger, ("open adhoc check point file dir error when dump, file path", path)); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "open adhoc check point file dir failed"); + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "open adhoc check point file dir failed"); return; } @@ -209,7 +209,7 @@ void AdhocJobCheckpoint::Dump(const std::string& path, bool isAutoDump) { std::ofstream ofs(path); if (!ofs.is_open()) { LOG_ERROR(sLogger, ("open adhoc check point file error, file path", path)); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "open adhoc check point file failed"); + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "open adhoc check point file failed"); return; } ofs << jsonString; diff --git a/core/checkpoint/CheckPointManager.cpp b/core/checkpoint/CheckPointManager.cpp index 6d69990782..5376e9326d 100644 --- a/core/checkpoint/CheckPointManager.cpp +++ b/core/checkpoint/CheckPointManager.cpp @@ -28,7 +28,7 @@ #include "file_server/ConfigManager.h" #include "file_server/FileDiscoveryOptions.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" using namespace std; DECLARE_FLAG_STRING(check_point_filename); @@ -134,7 +134,7 @@ void CheckPointManager::LoadCheckPoint() { LOG_ERROR(sLogger, ("load check point file fail, file content is not valid json", AppConfig::GetInstance()->GetCheckPointFilePath())); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "content of check point file is not valid json"); + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "content of check point file is not valid json"); } return; } @@ -171,11 +171,11 @@ void CheckPointManager::LoadDirCheckPoint(const Json::Value& root) { } } catch (const exception& e) { LOG_ERROR(sLogger, ("failed to parse dir checkpoint", e.what())); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "failed to parse dir checkpoint:" + string(e.what())); } catch (...) { LOG_ERROR(sLogger, ("failed to parse dir checkpoint", "unknown exception")); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "failed to parse dir checkpoint, unkonw exception"); } } @@ -301,11 +301,11 @@ void CheckPointManager::LoadFileCheckPoint(const Json::Value& root) { } } catch (const exception& e) { LOG_ERROR(sLogger, ("failed to parse file checkpoint", e.what())); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "failed to parse file checkpoint:" + string(e.what())); } catch (...) { LOG_ERROR(sLogger, ("failed to parse file checkpoint", "unknown exception")); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "failed to parse file checkpoint, unkonw exception"); } } @@ -317,7 +317,7 @@ bool CheckPointManager::DumpCheckPointToLocal() { if (!Mkdirs(ParentPath(checkPointFile))) { LOG_ERROR(sLogger, ("open check point file dir error", checkPointFile)); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "open check point file dir failed"); + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "open check point file dir failed"); return false; } @@ -379,7 +379,7 @@ bool CheckPointManager::DumpCheckPointToLocal() { = leaf; } LOG_WARNING(sLogger, ("Too many check point", mDevInodeCheckPointPtrMap.size())); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "Too many check point:" + ToString(mDevInodeCheckPointPtrMap.size())); } @@ -400,7 +400,7 @@ bool CheckPointManager::DumpCheckPointToLocal() { std::ofstream fout(checkPointTempFile.c_str()); if (!fout) { LOG_ERROR(sLogger, ("open check point file error", checkPointFile)); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "open check point file failed"); + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "open check point file failed"); return false; } Json::Value result; @@ -410,7 +410,7 @@ bool CheckPointManager::DumpCheckPointToLocal() { fout << result.toStyledString(); if (!fout.good()) { LOG_ERROR(sLogger, ("dump check point to file failed", checkPointFile)); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "dump check point to file failed"); + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, "dump check point to file failed"); fout.close(); return false; } @@ -422,7 +422,7 @@ bool CheckPointManager::DumpCheckPointToLocal() { #endif if (rename(checkPointTempFile.c_str(), checkPointFile.c_str()) == -1) { LOG_ERROR(sLogger, ("rename check point file fail, errno", errno)); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_ALARM, + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_ALARM, std::string("rename check point file fail, errno ") + ToString(errno)); return false; } @@ -494,7 +494,7 @@ boost::optional SearchFilePathByDevInodeInDirectory(const std::stri fsutil::Dir dir(dirPath); if (!dir.Open()) { - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( CHECKPOINT_ALARM, string("Failed to open dir : ") + dirPath + ";\terrno : " + ToString(GetErrno())); LOG_ERROR(sLogger, METHOD_LOG_PATTERN("message", "open dir error")("dir", dirPath)); continue; diff --git a/core/checkpoint/CheckpointManagerV2.cpp b/core/checkpoint/CheckpointManagerV2.cpp index 76f58f4c76..2d24437314 100644 --- a/core/checkpoint/CheckpointManagerV2.cpp +++ b/core/checkpoint/CheckpointManagerV2.cpp @@ -19,7 +19,7 @@ #include "common/ScopeInvoker.h" #include "common/TimeUtil.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "app_config/AppConfig.h" #include "checkpoint/CheckPointManager.h" @@ -45,7 +45,7 @@ namespace detail { std::string msg; msg.append("op:").append(op).append(", key:").append(key).append(", status:").append(s.ToString()); LOG_ERROR(sLogger, (title, msg)); - LogtailAlarm::GetInstance()->SendAlarm(CHECKPOINT_V2_ALARM, title + ", " + msg); + AlarmManager::GetInstance()->SendAlarm(CHECKPOINT_V2_ALARM, title + ", " + msg); } // Range key is represented by data pointer and size to avoid copy. diff --git a/core/common/EncodingConverter.cpp b/core/common/EncodingConverter.cpp index 6461e20679..d295aedc69 100644 --- a/core/common/EncodingConverter.cpp +++ b/core/common/EncodingConverter.cpp @@ -13,7 +13,7 @@ // limitations under the License. #include "EncodingConverter.h" -#include "LogtailAlarm.h" +#include "AlarmManager.h" #include "logger/Logger.h" #if defined(__linux__) #include @@ -78,7 +78,7 @@ size_t EncodingConverter::ConvertGbk2Utf8( if (ret == (size_t)(-1)) { LOG_ERROR(sLogger, ("convert GBK to UTF8 fail, errno", strerror(errno))); iconv(mGbk2Utf8Cd, NULL, NULL, NULL, NULL); // Clear status. - LogtailAlarm::GetInstance()->SendAlarm(ENCODING_CONVERT_ALARM, "convert GBK to UTF8 fail"); + AlarmManager::GetInstance()->SendAlarm(ENCODING_CONVERT_ALARM, "convert GBK to UTF8 fail"); // use memcpy memcpy(originDes + destIndex, originSrc + beginIndex, endIndex - beginIndex + 1); destIndex += endIndex - beginIndex + 1; diff --git a/core/common/ParamExtractor.h b/core/common/ParamExtractor.h index 7ee050e066..94432281ed 100644 --- a/core/common/ParamExtractor.h +++ b/core/common/ParamExtractor.h @@ -26,7 +26,7 @@ #include "common/StringTools.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #define PARAM_ERROR_RETURN(logger, alarm, msg, module, config, project, logstore, region) \ if (module.empty()) { \ diff --git a/core/common/compression/Compressor.h b/core/common/compression/Compressor.h index 4fd7c0b020..991893e69f 100644 --- a/core/common/compression/Compressor.h +++ b/core/common/compression/Compressor.h @@ -18,7 +18,7 @@ #include -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "common/compression/CompressType.h" namespace logtail { diff --git a/core/config/PipelineConfig.cpp b/core/config/PipelineConfig.cpp index febe01986f..e80d3fae8b 100644 --- a/core/config/PipelineConfig.cpp +++ b/core/config/PipelineConfig.cpp @@ -103,7 +103,7 @@ bool PipelineConfig::Parse() { string key, errorMsg; const Json::Value* itr = nullptr; - LogtailAlarm& alarm = *LogtailAlarm::GetInstance(); + AlarmManager& alarm = *AlarmManager::GetInstance(); // to send alarm and init MetricsRecord, project, logstore and region should be extracted first. key = "flushers"; itr = mDetail->find(key.c_str(), key.c_str() + key.size()); diff --git a/core/config/watcher/ConfigWatcher.cpp b/core/config/watcher/ConfigWatcher.cpp index baa3d28a9f..2c430a60ed 100644 --- a/core/config/watcher/ConfigWatcher.cpp +++ b/core/config/watcher/ConfigWatcher.cpp @@ -91,7 +91,7 @@ PipelineConfigDiff ConfigWatcher::CheckConfigDiff() { PipelineConfig config(configName, std::move(detail)); if (!config.Parse()) { LOG_ERROR(sLogger, ("new config found but invalid", "skip current object")("config", configName)); - LogtailAlarm::GetInstance()->SendAlarm(CATEGORY_CONFIG_ALARM, + AlarmManager::GetInstance()->SendAlarm(CATEGORY_CONFIG_ALARM, "new config found but invalid: skip current object, config: " + configName, config.mProject, @@ -130,7 +130,7 @@ PipelineConfigDiff ConfigWatcher::CheckConfigDiff() { LOG_ERROR(sLogger, ("existing invalid config modified and remains invalid", "skip current object")("config", configName)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( CATEGORY_CONFIG_ALARM, "existing invalid config modified and remains invalid: skip current object, config: " + configName, @@ -149,7 +149,7 @@ PipelineConfigDiff ConfigWatcher::CheckConfigDiff() { LOG_ERROR(sLogger, ("existing valid config modified and becomes invalid", "keep current pipeline running")("config", configName)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( CATEGORY_CONFIG_ALARM, "existing valid config modified and becomes invalid: skip current object, config: " + configName, diff --git a/core/ebpf/SelfMonitor.h b/core/ebpf/SelfMonitor.h index 4b8551f1ef..48847f8229 100644 --- a/core/ebpf/SelfMonitor.h +++ b/core/ebpf/SelfMonitor.h @@ -21,7 +21,7 @@ #include "ebpf/include/export.h" #include "monitor/PluginMetricManager.h" #include "common/Lock.h" -#include "monitor/LoongCollectorMetricTypes.h" +#include "monitor/MetricTypes.h" #include "monitor/metric_constants/MetricConstants.h" namespace logtail { diff --git a/core/ebpf/eBPFServer.h b/core/ebpf/eBPFServer.h index 9141a9ec9a..24f79347ea 100644 --- a/core/ebpf/eBPFServer.h +++ b/core/ebpf/eBPFServer.h @@ -29,7 +29,7 @@ #include "ebpf/handler/AbstractHandler.h" #include "ebpf/handler/ObserveHandler.h" #include "ebpf/handler/SecurityHandler.h" -#include "monitor/LoongCollectorMetricTypes.h" +#include "monitor/MetricTypes.h" #include "ebpf/SelfMonitor.h" namespace logtail { diff --git a/core/ebpf/handler/AbstractHandler.h b/core/ebpf/handler/AbstractHandler.h index 0df2b539df..b71924be33 100644 --- a/core/ebpf/handler/AbstractHandler.h +++ b/core/ebpf/handler/AbstractHandler.h @@ -17,8 +17,8 @@ #include #include "pipeline/PipelineContext.h" -#include "monitor/LoongCollectorMetricTypes.h" -#include "monitor/LogtailMetric.h" +#include "monitor/MetricTypes.h" +#include "monitor/MetricManager.h" namespace logtail{ namespace ebpf { diff --git a/core/file_server/ConfigManager.cpp b/core/file_server/ConfigManager.cpp index d339e9ca5f..3ae6c241f9 100644 --- a/core/file_server/ConfigManager.cpp +++ b/core/file_server/ConfigManager.cpp @@ -50,7 +50,7 @@ #include "file_server/event_handler/EventHandler.h" #include "file_server/FileServer.h" #include "monitor/LogFileProfiler.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "pipeline/Pipeline.h" #include "pipeline/PipelineManager.h" @@ -153,7 +153,7 @@ bool ConfigManager::RegisterHandlersRecursively(const std::string& path, fsutil::Dir dir(path); if (!dir.Open()) { auto err = GetErrno(); - LogtailAlarm::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, string("Failed to open dir : ") + path + ";\terrno : " + ToString(err), config.second->GetProjectName(), config.second->GetLogstoreName(), @@ -319,7 +319,7 @@ void ConfigManager::RegisterWildcardPath(const FileDiscoveryConfig& config, cons fsutil::Dir dir(path); if (!dir.Open()) { auto err = GetErrno(); - LogtailAlarm::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, string("Failed to open dir : ") + path + ";\terrno : " + ToString(err), config.second->GetProjectName(), config.second->GetLogstoreName(), @@ -334,7 +334,7 @@ void ConfigManager::RegisterWildcardPath(const FileDiscoveryConfig& config, cons LOG_WARNING(sLogger, ("too many sub directoried for path", path)("dirCount", dirCount)("basePath", config.first->GetBasePath())); - LogtailAlarm::GetInstance()->SendAlarm(STAT_LIMIT_ALARM, + AlarmManager::GetInstance()->SendAlarm(STAT_LIMIT_ALARM, string("too many sub directoried for path:" + path + " dirCount: " + ToString(dirCount) + " basePath" + config.first->GetBasePath()), @@ -471,7 +471,7 @@ bool ConfigManager::RegisterHandlersWithinDepth(const std::string& path, const F fsutil::Dir dir(path); if (!dir.Open()) { int err = GetErrno(); - LogtailAlarm::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, string("Failed to open dir : ") + path + ";\terrno : " + ToString(err), config.second->GetProjectName(), config.second->GetLogstoreName(), @@ -508,7 +508,7 @@ bool ConfigManager::RegisterDescendants(const string& path, const FileDiscoveryC fsutil::Dir dir(path); if (!dir.Open()) { auto err = GetErrno(); - LogtailAlarm::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, string("Failed to open dir : ") + path + ";\terrno : " + ToString(err), config.second->GetProjectName(), config.second->GetLogstoreName(), @@ -615,7 +615,7 @@ FileDiscoveryConfig ConfigManager::FindBestMatch(const string& path, const strin ("file", path + '/' + name)("include in multi config", logNameList)("best", prevMatch.second->GetConfigName())); for (auto iter = multiConfigs.begin(); iter != multiConfigs.end(); ++iter) { - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( MULTI_CONFIG_MATCH_ALARM, path + '/' + name + " include in multi config, best and oldest match: " + prevMatch.second->GetProjectName() + ',' + prevMatch.second->GetLogstoreName() + ',' @@ -759,7 +759,7 @@ int32_t ConfigManager::FindMatchWithForceFlag(std::vector& ("file", path + '/' + name)("include in multi config", logNameList)("best", prevMatch.second->GetConfigName())); for (auto iter = multiConfigs.begin(); iter != multiConfigs.end(); ++iter) { - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( MULTI_CONFIG_MATCH_ALARM, path + '/' + name + " include in multi config, best and oldest match: " + prevMatch.second->GetProjectName() + ',' + prevMatch.second->GetLogstoreName() + ',' @@ -798,7 +798,7 @@ void ConfigManager::SendAllMatchAlarm(const string& path, ("file", path + '/' + name)("include in too many configs", allConfig.size())( "max multi config size", maxMultiConfigSize)("allconfigs", allConfigNames)); for (auto iter = allConfig.begin(); iter != allConfig.end(); ++iter) - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( TOO_MANY_CONFIG_ALARM, path + '/' + name + " include in too many configs:" + ToString(allConfig.size()) + ", max multi config size : " + ToString(maxMultiConfigSize) + ", allmatch: " + allConfigNames, diff --git a/core/file_server/EventDispatcher.cpp b/core/file_server/EventDispatcher.cpp index dbbd80cd11..1b03c06211 100644 --- a/core/file_server/EventDispatcher.cpp +++ b/core/file_server/EventDispatcher.cpp @@ -46,7 +46,7 @@ #include "file_server/polling/PollingDirFile.h" #include "file_server/polling/PollingModify.h" #include "monitor/LogFileProfiler.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "monitor/MetricExportor.h" #include "protobuf/sls/metric.pb.h" #include "protobuf/sls/sls_logs.pb.h" @@ -102,7 +102,7 @@ EventDispatcher::EventDispatcher() : mWatchNum(0), mInotifyWatchNum(0) { mEventListener = EventListener::GetInstance(); if (!AppConfig::GetInstance()->NoInotify()) { if (!mEventListener->Init()) { - LogtailAlarm::GetInstance()->SendAlarm(EPOLL_ERROR_ALARM, + AlarmManager::GetInstance()->SendAlarm(EPOLL_ERROR_ALARM, string("faild to init inotify fd, errno:") + ToString(GetErrno())); LOG_ERROR(sLogger, ("faild to init inotify fd, errno:", errno)); } @@ -161,7 +161,7 @@ bool EventDispatcher::RegisterEventHandler(const char* path, if (!fsutil::PathStat::stat(path, statBuf)) { if (errno != EEXIST) { LOG_WARNING(sLogger, ("call stat() on path fail", path)("errno", errno)); - LogtailAlarm::GetInstance()->SendAlarm(REGISTER_INOTIFY_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(REGISTER_INOTIFY_FAIL_ALARM, "call stat() on path fail" + string(path) + ", errno: " + ToString(errno) + ", will not be monitored", config.second->GetProjectName(), @@ -215,7 +215,7 @@ bool EventDispatcher::RegisterEventHandler(const char* path, if (mWatchNum >= INT32_FLAG(max_watch_dir_count)) { LOG_WARNING(sLogger, ("fail to monitor dir, max_watch_dir_count", INT32_FLAG(max_watch_dir_count))("dir", path)); - LogtailAlarm::GetInstance()->SendAlarm(DIR_EXCEED_LIMIT_ALARM, + AlarmManager::GetInstance()->SendAlarm(DIR_EXCEED_LIMIT_ALARM, string("dir: ") + path + " will not monitored, dir count should less than " + ToString(INT32_FLAG(max_watch_dir_count)), @@ -230,7 +230,7 @@ bool EventDispatcher::RegisterEventHandler(const char* path, LOG_INFO(sLogger, ("failed to add inotify watcher for dir", path)("max allowd inotify watchers", INT32_FLAG(default_max_inotify_watch_num))); - LogtailAlarm::GetInstance()->SendAlarm(INOTIFY_DIR_NUM_LIMIT_ALARM, + AlarmManager::GetInstance()->SendAlarm(INOTIFY_DIR_NUM_LIMIT_ALARM, string("failed to register inotify watcher for dir") + path); } else { // need check mEventListener valid @@ -245,21 +245,21 @@ bool EventDispatcher::RegisterEventHandler(const char* path, LOG_ERROR(sLogger, ("failed to register dir", path)("errno", errno)("error", str)("force exit", "wait 10 seconds.")); - LogtailAlarm::GetInstance()->SendAlarm(LOGTAIL_CRASH_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOGTAIL_CRASH_ALARM, string("Failed to register dir: ") + path + ", errno: " + ToString(errno) + ", error: " + str + ", force exit"); - LogtailAlarm::GetInstance()->ForceToSend(); + AlarmManager::GetInstance()->ForceToSend(); sleep(10); _exit(1); } #endif if (config.first->IsTimeout(path)) - LogtailAlarm::GetInstance()->SendAlarm(REGISTER_INOTIFY_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(REGISTER_INOTIFY_FAIL_ALARM, string("Failed to register dir: ") + path + ", reason: " + str + ", project: " + config.second->GetProjectName() + ", logstore: " + config.second->GetLogstoreName()); else - LogtailAlarm::GetInstance()->SendAlarm(REGISTER_INOTIFY_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(REGISTER_INOTIFY_FAIL_ALARM, string("Failed to register dir: ") + path + ", reason: " + str + ", no timeout"); } else { @@ -551,7 +551,7 @@ EventDispatcher::ValidateCheckpointResult EventDispatcher::validateCheckpoint( ("delete checkpoint", "cannot find the file because of full find cache")("config", checkpoint->mConfigName)( "log reader queue name", checkpoint->mFileName)("real file path", checkpoint->mRealFileName)( "file device", checkpoint->mDevInode.inode)("file inode", checkpoint->mDevInode.inode)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( CHECKPOINT_ALARM, string("cannot find the file because of full find cache, delete the checkpoint, log reader queue name: ") + filePath + ", real file path: " + realFilePath); @@ -921,7 +921,7 @@ void EventDispatcher::PropagateTimeout(const char* path) { MapType::Type::iterator pathpos = mPathWdMap.find(tmp); if (pathpos == mPathWdMap.end()) { // walkarond of bug#5760293, should find the scenarios - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( INVALID_MEMORY_ACCESS_ALARM, "PropagateTimeout access invalid key of mPathWdMap, path : " + string(tmp)); LOG_ERROR(sLogger, ("PropagateTimeout access invalid key of mPathWdMap, path", string(tmp))); free(tmp); diff --git a/core/file_server/FileServer.cpp b/core/file_server/FileServer.cpp index 0741efefca..511f202e9a 100644 --- a/core/file_server/FileServer.cpp +++ b/core/file_server/FileServer.cpp @@ -79,7 +79,7 @@ void FileServer::PauseInner() { LogInput::GetInstance()->HoldOn(); auto holdOnCost = GetCurrentTimeInMilliSeconds() - holdOnStart; if (holdOnCost >= 60 * 1000) { - LogtailAlarm::GetInstance()->SendAlarm(HOLD_ON_TOO_SLOW_ALARM, + AlarmManager::GetInstance()->SendAlarm(HOLD_ON_TOO_SLOW_ALARM, "Pausing file server took " + ToString(holdOnCost) + "ms"); } LOG_INFO(sLogger, ("file server pause", "succeeded")("cost", ToString(holdOnCost) + "ms")); diff --git a/core/file_server/FileServer.h b/core/file_server/FileServer.h index d77b6a13a1..ec539dabdf 100644 --- a/core/file_server/FileServer.h +++ b/core/file_server/FileServer.h @@ -24,7 +24,7 @@ #include "file_server/FileDiscoveryOptions.h" #include "file_server/MultilineOptions.h" #include "file_server/reader/FileReaderOptions.h" -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "monitor/PluginMetricManager.h" #include "pipeline/PipelineContext.h" diff --git a/core/file_server/event_handler/EventHandler.cpp b/core/file_server/event_handler/EventHandler.cpp index 4c9774b373..e3bf580d78 100644 --- a/core/file_server/event_handler/EventHandler.cpp +++ b/core/file_server/event_handler/EventHandler.cpp @@ -29,7 +29,7 @@ #include "file_server/event/BlockEventManager.h" #include "file_server/event_handler/LogInput.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "pipeline/queue/ProcessQueueManager.h" #include "runner/ProcessorRunner.h" @@ -82,7 +82,7 @@ void NormalEventHandler::Handle(const Event& event) { mCreateHandlerPtr->Handle(event); } else if (!buf.IsRegFile()) { LOG_INFO(sLogger, ("path is not file or directory, ignore it", fullPath)("stat mode", buf.GetMode())); - LogtailAlarm::GetInstance()->SendAlarm(UNEXPECTED_FILE_TYPE_MODE_ALARM, + AlarmManager::GetInstance()->SendAlarm(UNEXPECTED_FILE_TYPE_MODE_ALARM, string("found unexpected type mode: ") + ToString(buf.GetMode()) + ", file path: " + fullPath); return; @@ -231,7 +231,7 @@ void CreateModifyHandler::Handle(const Event& event) { isDir = true; else if (!buf.IsRegFile()) { LOG_INFO(sLogger, ("path is not file or directory, ignore it", path)("stat mode", buf.GetMode())); - LogtailAlarm::GetInstance()->SendAlarm(UNEXPECTED_FILE_TYPE_MODE_ALARM, + AlarmManager::GetInstance()->SendAlarm(UNEXPECTED_FILE_TYPE_MODE_ALARM, std::string("found unexpected type mode: ") + ToString(buf.GetMode()) + ", file path: " + path); return; @@ -336,7 +336,7 @@ void ModifyHandler::MakeSpaceForNewReader() { "total log reader count exceeds upper limit")("reader count after clean", mDevInodeReaderMap.size())); // randomly choose one project to send alarm LogFileReaderPtr oneReader = mDevInodeReaderMap.begin()->second; - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( FILE_READER_EXCEED_ALARM, string("total log reader count exceeds upper limit, delete some of the old readers, reader count after clean:") + ToString(mDevInodeReaderMap.size()), @@ -386,7 +386,7 @@ LogFileReaderPtr ModifyHandler::CreateLogFileReaderPtr(const string& path, "logstore", readerConfig.second->GetLogstoreName())("config", readerConfig.second->GetConfigName())( "log reader queue name", PathJoin(path, name))("max queue length", readerConfig.first->mRotatorQueueSize)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( DROP_LOG_ALARM, string("log reader queue length excceeds upper limit, stop creating new reader, config: ") + readerConfig.second->GetConfigName() + ", log reader queue name: " + PathJoin(path, name) @@ -708,7 +708,7 @@ void ModifyHandler::Handle(const Event& event) { ToString(reader->GetDevInode().inode), reader->GetLastFilePos())("DevInode map size", mDevInodeReaderMap.size())); recreateReaderFlag = true; - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( INNER_PROFILE_ALARM, string("file dev inode changed, create new reader. new path:") + reader->GetHostLogPath() + " ,project:" + reader->GetProject() + " ,logstore:" + reader->GetLogstore()); @@ -761,7 +761,7 @@ void ModifyHandler::Handle(const Event& event) { ("logprocess queue is full, put modify event to event queue again", reader->GetHostLogPath())(reader->GetProject(), reader->GetLogstore())); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( PROCESS_QUEUE_BUSY_ALARM, string("logprocess queue is full, put modify event to event queue again, file:") + reader->GetHostLogPath(), diff --git a/core/file_server/event_handler/LogInput.cpp b/core/file_server/event_handler/LogInput.cpp index 299e660c68..aef3726614 100644 --- a/core/file_server/event_handler/LogInput.cpp +++ b/core/file_server/event_handler/LogInput.cpp @@ -37,7 +37,7 @@ #include "file_server/reader/GloablFileDescriptorManager.h" #include "file_server/reader/LogFileReader.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "monitor/Monitor.h" #ifdef __ENTERPRISE__ #include "config/provider/EnterpriseConfigProvider.h" @@ -279,7 +279,7 @@ bool LogInput::ReadLocalEvents() { sLogger, ("process local event, dir", source)("file name", object)("config", configName)( "project", readerConfig.second->GetProjectName())("logstore", readerConfig.second->GetLogstoreName())); - LogtailAlarm::GetInstance()->SendAlarm(LOAD_LOCAL_EVENT_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOAD_LOCAL_EVENT_ALARM, string("process local event, dir:") + source + ", file name:" + object + ", config:" + configName + ", file count:" + ToString(objList.size()), diff --git a/core/file_server/event_listener/EventListener_Linux.cpp b/core/file_server/event_listener/EventListener_Linux.cpp index 849aa7616d..634fc27a40 100644 --- a/core/file_server/event_listener/EventListener_Linux.cpp +++ b/core/file_server/event_listener/EventListener_Linux.cpp @@ -17,7 +17,7 @@ #include #include #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "common/ErrorUtil.h" #include "common/Flags.h" #include "file_server/EventDispatcher.h" @@ -101,7 +101,7 @@ int32_t logtail::EventListener::ReadEvents(std::vector& eventVe EventType etype = 0; if (event->mask & IN_Q_OVERFLOW) { LOG_INFO(sLogger, ("inotify event queue overflow", "miss inotify events")); - LogtailAlarm::GetInstance()->SendAlarm(INOTIFY_EVENT_OVERFLOW_ALARM, "inotify event queue overflow"); + AlarmManager::GetInstance()->SendAlarm(INOTIFY_EVENT_OVERFLOW_ALARM, "inotify event queue overflow"); } else { etype |= event->mask & IN_DELETE_SELF ? EVENT_TIMEOUT : 0; etype |= event->mask & IN_CREATE ? EVENT_CREATE : 0; diff --git a/core/file_server/polling/PollingDirFile.cpp b/core/file_server/polling/PollingDirFile.cpp index 22c0866c1e..92deb2cad3 100644 --- a/core/file_server/polling/PollingDirFile.cpp +++ b/core/file_server/polling/PollingDirFile.cpp @@ -33,7 +33,7 @@ #include "file_server/polling/PollingEventQueue.h" #include "file_server/polling/PollingModify.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "monitor/metric_constants/MetricConstants.h" // Control the check frequency to call ClearUnavailableFileAndDir. @@ -118,7 +118,7 @@ void PollingDirFile::CheckConfigPollingStatCount(const int32_t lastStatCount, LOG_WARNING(sLogger, (msgBase, diffCount)(config.first->GetBasePath(), mStatCount)(config.second->GetProjectName(), config.second->GetLogstoreName())); - LogtailAlarm::GetInstance()->SendAlarm(STAT_LIMIT_ALARM, + AlarmManager::GetInstance()->SendAlarm(STAT_LIMIT_ALARM, msgBase + ", current count: " + ToString(diffCount) + " total count:" + ToString(mStatCount) + " path: " + config.first->GetBasePath(), config.second->GetProjectName(), @@ -382,7 +382,7 @@ bool PollingDirFile::PollingNormalConfigPath(const FileDiscoveryConfig& pConfig, LOG_DEBUG(sLogger, ("Open dir error, ENOENT, dir", dirPath.c_str())); return false; } else { - LogtailAlarm::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, string("Failed to open dir : ") + dirPath + ";\terrno : " + ToString(err), pConfig.second->GetProjectName(), @@ -405,7 +405,7 @@ bool PollingDirFile::PollingNormalConfigPath(const FileDiscoveryConfig& pConfig, LOG_WARNING(sLogger, ("total dir's polling stat count is exceeded", nowStatCount)(dirPath, mStatCount)( pConfig.second->GetProjectName(), pConfig.second->GetLogstoreName())); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( STAT_LIMIT_ALARM, string("total dir's polling stat count is exceeded, now count:") + ToString(nowStatCount) + " total count:" + ToString(mStatCount) + " path: " + dirPath + " project:" @@ -417,7 +417,7 @@ bool PollingDirFile::PollingNormalConfigPath(const FileDiscoveryConfig& pConfig, LOG_WARNING(sLogger, ("this dir's polling stat count is exceeded", nowStatCount)(dirPath, mStatCount)( pConfig.second->GetProjectName(), pConfig.second->GetLogstoreName())); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( STAT_LIMIT_ALARM, string("this dir's polling stat count is exceeded, now count:") + ToString(nowStatCount) + " total count:" + ToString(mStatCount) + " path: " + dirPath @@ -547,7 +547,7 @@ bool PollingDirFile::PollingWildcardConfigPath(const FileDiscoveryConfig& pConfi LOG_DEBUG(sLogger, ("Open dir fail, ENOENT, dir", dirPath.c_str())); return false; } else { - LogtailAlarm::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOGDIR_PERMINSSION_ALARM, string("Failed to open dir : ") + dirPath + ";\terrno : " + ToString(err), pConfig.second->GetProjectName(), @@ -566,7 +566,7 @@ bool PollingDirFile::PollingWildcardConfigPath(const FileDiscoveryConfig& pConfi LOG_WARNING(sLogger, ("too many sub directoried for path", dirPath)("dirCount", dirCount)("basePath", pConfig.first->GetBasePath())); - LogtailAlarm::GetInstance()->SendAlarm(STAT_LIMIT_ALARM, + AlarmManager::GetInstance()->SendAlarm(STAT_LIMIT_ALARM, string("too many sub directoried for path:" + dirPath + " dirCount: " + ToString(dirCount) + " basePath" + pConfig.first->GetBasePath()), @@ -583,7 +583,7 @@ bool PollingDirFile::PollingWildcardConfigPath(const FileDiscoveryConfig& pConfi LOG_WARNING(sLogger, ("total dir's polling stat count is exceeded", "")(dirPath, mStatCount)(pConfig.second->GetProjectName(), pConfig.second->GetLogstoreName())); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( STAT_LIMIT_ALARM, string("total dir's polling stat count is exceeded, total count:" + ToString(mStatCount) + " path: " + dirPath + " project:" + pConfig.second->GetProjectName() diff --git a/core/file_server/polling/PollingModify.cpp b/core/file_server/polling/PollingModify.cpp index 0a940cd4a3..839ce4c1d6 100644 --- a/core/file_server/polling/PollingModify.cpp +++ b/core/file_server/polling/PollingModify.cpp @@ -27,7 +27,7 @@ #include "file_server/FileServer.h" #include "file_server/event/Event.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "monitor/metric_constants/MetricConstants.h" using namespace std; @@ -108,7 +108,7 @@ void PollingModify::MakeSpaceForNewFile() { return; } - LogtailAlarm::GetInstance()->SendAlarm(MODIFY_FILE_EXCEED_ALARM, + AlarmManager::GetInstance()->SendAlarm(MODIFY_FILE_EXCEED_ALARM, string("modify cache is up limit, delete old cache, modify file count:") + ToString(mModifyCacheMap.size()) + " delete count : " + ToString(removeCount)); @@ -156,7 +156,7 @@ void PollingModify::LoadFileNameInQueues() { if (mModifyCacheMap.size() >= (size_t)INT32_FLAG(modify_cache_max)) { LOG_ERROR(sLogger, ("total modify polling stat count is exceeded, drop event", newFileNameQueue.size())); - LogtailAlarm::GetInstance()->SendAlarm(MODIFY_FILE_EXCEED_ALARM, + AlarmManager::GetInstance()->SendAlarm(MODIFY_FILE_EXCEED_ALARM, string("total modify polling stat count is exceeded, drop event:") + ToString(newFileNameQueue.size())); hasSpace = false; diff --git a/core/file_server/reader/LogFileReader.cpp b/core/file_server/reader/LogFileReader.cpp index b61d14560d..62c9a49c6f 100644 --- a/core/file_server/reader/LogFileReader.cpp +++ b/core/file_server/reader/LogFileReader.cpp @@ -47,7 +47,7 @@ #include "file_server/reader/JsonLogFileReader.h" #include "logger/Logger.h" #include "monitor/LogFileProfiler.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "monitor/metric_constants/MetricConstants.h" #include "pipeline/queue/ExactlyOnceQueueManager.h" #include "pipeline/queue/ProcessQueueManager.h" @@ -643,7 +643,7 @@ bool LogFileReader::CheckForFirstOpen(FileReadPolicy policy) { return true; else { LOG_ERROR(sLogger, ("open log file fail", mHostLogPath)("errno", ErrnoToString(error))); - LogtailAlarm::GetInstance()->SendAlarm(OPEN_LOGFILE_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(OPEN_LOGFILE_FAIL_ALARM, string("Failed to open log file: ") + mHostLogPath + "; errono:" + ErrnoToString(error), GetProject(), @@ -830,7 +830,7 @@ std::string LogFileReader::GetTopicName(const std::string& topicConfig, const st ("extract topic by regex", "fail")("project", GetProject())("logstore", GetLogstore())( "path", finalPath)("regx", topicConfig)); - LogtailAlarm::GetInstance()->SendAlarm(CATEGORY_CONFIG_ALARM, + AlarmManager::GetInstance()->SendAlarm(CATEGORY_CONFIG_ALARM, string("extract topic by regex fail, exception:") + exception + ", path:" + finalPath + ", regex:" + topicConfig, GetProject(), @@ -841,7 +841,7 @@ std::string LogFileReader::GetTopicName(const std::string& topicConfig, const st LOG_ERROR(sLogger, ("extract topic by regex", "fail")("exception", exception)("project", GetProject())( "logstore", GetLogstore())("path", finalPath)("regx", topicConfig)); - LogtailAlarm::GetInstance()->SendAlarm(CATEGORY_CONFIG_ALARM, + AlarmManager::GetInstance()->SendAlarm(CATEGORY_CONFIG_ALARM, string("extract topic by regex fail, exception:") + exception + ", path:" + finalPath + ", regex:" + topicConfig, GetProject(), @@ -994,7 +994,7 @@ void LogFileReader::OnOpenFileError() { "log path", mRealLogPath)("file device", ToString(mDevInode.dev))( "file inode", ToString(mDevInode.inode))("file signature", mLastFileSignatureHash)( "file signature size", mLastFileSignatureSize)("last file position", mLastFilePos)); - LogtailAlarm::GetInstance()->SendAlarm(LOGFILE_PERMINSSION_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOGFILE_PERMINSSION_ALARM, string("Failed to open log file because of permission: ") + mHostLogPath, GetProject(), @@ -1008,7 +1008,7 @@ void LogFileReader::OnOpenFileError() { "file device", ToString(mDevInode.dev))("file inode", ToString(mDevInode.inode))( "file signature", mLastFileSignatureHash)("file signature size", mLastFileSignatureSize)( "last file position", mLastFilePos)); - LogtailAlarm::GetInstance()->SendAlarm(OPEN_LOGFILE_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(OPEN_LOGFILE_FAIL_ALARM, string("Failed to open log file because of : Too many open files") + mHostLogPath, GetProject(), @@ -1022,7 +1022,7 @@ void LogFileReader::OnOpenFileError() { "file device", ToString(mDevInode.dev))("file inode", ToString(mDevInode.inode))( "file signature", mLastFileSignatureHash)("file signature size", mLastFileSignatureSize)( "last file position", mLastFilePos)); - LogtailAlarm::GetInstance()->SendAlarm(OPEN_LOGFILE_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(OPEN_LOGFILE_FAIL_ALARM, string("Failed to open log file: ") + mHostLogPath + "; errono:" + ErrnoToString(GetErrno()), GetProject(), @@ -1054,7 +1054,7 @@ bool LogFileReader::UpdateFilePtr() { "file device", ToString(mDevInode.dev))("file inode", ToString(mDevInode.inode))( "file signature", mLastFileSignatureHash)("file signature size", mLastFileSignatureSize)( "last file position", mLastFilePos)); - LogtailAlarm::GetInstance()->SendAlarm(OPEN_FILE_LIMIT_ALARM, + AlarmManager::GetInstance()->SendAlarm(OPEN_FILE_LIMIT_ALARM, string("Failed to open log file: ") + mHostLogPath + " limit:" + ToString(INT32_FLAG(max_reader_open_files)), GetProject(), @@ -1198,7 +1198,7 @@ void LogFileReader::CloseFilePtr() { "file device", ToString(mDevInode.dev))("file inode", ToString(mDevInode.inode))( "file signature", mLastFileSignatureHash)("file signature size", mLastFileSignatureSize)( "file size", mLastFileSize)("last file position", mLastFilePos)("reader id", long(this))); - LogtailAlarm::GetInstance()->SendAlarm(OPEN_LOGFILE_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(OPEN_LOGFILE_FAIL_ALARM, string("close file error because of ") + strerror(errno) + ", file path: " + mHostLogPath + ", inode: " + ToString(mDevInode.inode) + ", inode: " + ToString(fd), @@ -1259,7 +1259,7 @@ bool LogFileReader::CheckFileSignatureAndOffset(bool isOpenOnUpdate) { sLogger, ("tell error", mHostLogPath)("inode", mDevInode.inode)("error", strerror(lastErrNo))("reopen", reopenFlag)( "project", GetProject())("logstore", GetLogstore())("config", GetConfigName())); - LogtailAlarm::GetInstance()->SendAlarm(OPEN_LOGFILE_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(OPEN_LOGFILE_FAIL_ALARM, string("tell error because of ") + strerror(lastErrNo) + " file path: " + mHostLogPath + ", inode : " + ToString(mDevInode.inode), GetProject(), @@ -1312,7 +1312,7 @@ bool LogFileReader::CheckFileSignatureAndOffset(bool isOpenOnUpdate) { mHostLogPath)(ToString(endSize), ToString(mLastFilePos))("project", GetProject())( "logstore", GetLogstore())("config", GetConfigName())); - LogtailAlarm::GetInstance()->SendAlarm(LOG_TRUNCATE_ALARM, + AlarmManager::GetInstance()->SendAlarm(LOG_TRUNCATE_ALARM, mHostLogPath + " signature is same but size decrease, read from now fileSize " + ToString(endSize) + " last read pos " + ToString(mLastFilePos), @@ -1390,12 +1390,12 @@ bool LogFileReader::ParseLogTime(const char* buffer, } } if (AppConfig::GetInstance()->IsLogParseAlarmValid()) { - if (LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + if (AlarmManager::GetInstance()->IsLowLevelAlarmValid()) { LOG_ERROR(sLogger, ("parse regex log fail, exception", exception)("buffer", buffer)("project", project)("logstore", logStore)("file", logPath)); } - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( REGEX_MATCH_ALARM, "parse regex log fail:" + exception, project, logStore, region); } return false; @@ -1415,12 +1415,12 @@ bool LogFileReader::GetLogTimeByOffset(const char* buffer, int nanosecondLength = 0; if (strptime_ns(buffer + pos, timeFormat.c_str(), &t, &nanosecond, &nanosecondLength) == NULL) { if (AppConfig::GetInstance()->IsLogParseAlarmValid()) { - if (LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + if (AlarmManager::GetInstance()->IsLowLevelAlarmValid()) { LOG_WARNING(sLogger, ("get time by offset fail, region", region)("project", project)("logstore", logStore)("file", logPath)); } - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( PARSE_TIME_FAIL_ALARM, "errorlog:" + string(buffer), project, logStore, region); } return false; @@ -1482,7 +1482,7 @@ bool LogFileReader::GetRawData(LogBuffer& logBuffer, int64_t fileSize, bool tryR LOG_WARNING(sLogger, ("read log delay", mHostLogPath)("fall behind bytes", delta)("file size", fileSize)("read pos", mLastFilePos)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( READ_LOG_DELAY_ALARM, std::string("fall behind ") + ToString(delta) + " bytes, file size:" + ToString(fileSize) + ", now position:" + ToString(mLastFilePos) + ", path:" + mHostLogPath @@ -1501,7 +1501,7 @@ bool LogFileReader::GetRawData(LogBuffer& logBuffer, int64_t fileSize, bool tryR ("read log delay and force set file pos to file size", mHostLogPath)("fall behind bytes", delta)( "skip bytes config", mReaderConfig.first->mReadDelaySkipThresholdBytes)("file size", fileSize)( "read pos", mLastFilePos)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( READ_LOG_DELAY_ALARM, string("force set file pos to file size, fall behind ") + ToString(delta) + " bytes, file size:" + ToString(fileSize) + ", now position:" + ToString(mLastFilePos) @@ -1535,7 +1535,7 @@ bool LogFileReader::GetRawData(LogBuffer& logBuffer, int64_t fileSize, bool tryR LOG_WARNING(sLogger, ("read stopped container file", mHostLogPath)("stopped time", mContainerStoppedTime)( "file size", fileSize)("read pos", mLastFilePos)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( READ_STOPPED_CONTAINER_ALARM, string("path: ") + mHostLogPath + ", stopped time:" + ToString(mContainerStoppedTime) + ", file size:" + ToString(fileSize) + ", now position:" + ToString(mLastFilePos), @@ -1671,7 +1671,7 @@ void LogFileReader::ReadUTF8(LogBuffer& logBuffer, int64_t end, bool& moreData, oss << "Log is too long and forced to be split at offset: " << ToString(mLastFilePos + nbytes) << " file: " << mHostLogPath << " inode: " << ToString(mDevInode.inode) << " first 1024B log: " << logBuffer.rawBuffer.substr(0, 1024) << std::endl; - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( SPLIT_LOG_FAIL_ALARM, oss.str(), GetProject(), GetLogstore(), GetRegion()); } else { // line is not finished yet nor more data, put all data in cache @@ -1876,7 +1876,7 @@ void LogFileReader::ReadGBK(LogBuffer& logBuffer, int64_t end, bool& moreData, b oss << "Log is too long and forced to be split at offset: " << ToString(mLastFilePos) << " file: " << mHostLogPath << " inode: " << ToString(mDevInode.inode) << " first 1024B log: " << logBuffer.rawBuffer.substr(0, 1024) << std::endl; - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( SPLIT_LOG_FAIL_ALARM, oss.str(), GetProject(), GetLogstore(), GetRegion()); } LOG_DEBUG(sLogger, @@ -1905,7 +1905,7 @@ LogFileReader::ReadFile(LogFileOperator& op, void* buf, size_t size, int64_t& of // LOG_INFO(sLogger, // ("read fuse file with a hole, size", // offset - oriOffset)("filename", mHostLogPath)("dev", mDevInode.dev)("inode", mDevInode.inode)); - // LogtailAlarm::GetInstance()->SendAlarm( + // AlarmManager::GetInstance()->SendAlarm( // FUSE_FILE_TRUNCATE_ALARM, // string("read fuse file with a hole, size: ") + ToString(offset - oriOffset) + " filename: " // + mHostLogPath + " dev: " + ToString(mDevInode.dev) + " inode: " + ToString(mDevInode.inode), diff --git a/core/go_pipeline/LogtailPlugin.cpp b/core/go_pipeline/LogtailPlugin.cpp index 609242ce64..993c050798 100644 --- a/core/go_pipeline/LogtailPlugin.cpp +++ b/core/go_pipeline/LogtailPlugin.cpp @@ -27,7 +27,7 @@ #include "file_server/ConfigManager.h" #include "logger/Logger.h" #include "monitor/LogFileProfiler.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "pipeline/PipelineManager.h" #include "pipeline/queue/SenderQueueManager.h" #include "provider/Provider.h" @@ -135,7 +135,7 @@ void LogtailPlugin::StopAllPipelines(bool withInputFlag) { auto stopAllCost = GetCurrentTimeInMilliSeconds() - stopAllStart; LOG_INFO(sLogger, ("Go pipelines stop all", "succeeded")("cost", ToString(stopAllCost) + "ms")); if (stopAllCost >= 10 * 1000) { - LogtailAlarm::GetInstance()->SendAlarm(HOLD_ON_TOO_SLOW_ALARM, + AlarmManager::GetInstance()->SendAlarm(HOLD_ON_TOO_SLOW_ALARM, "Stopping all Go pipelines took " + ToString(stopAllCost) + "ms"); } } @@ -152,7 +152,7 @@ void LogtailPlugin::Stop(const std::string& configName, bool removedFlag) { auto stopCost = GetCurrentTimeInMilliSeconds() - stopStart; LOG_INFO(sLogger, ("Go pipelines stop", "succeeded")("config", configName)("cost", ToString(stopCost) + "ms")); if (stopCost >= 10 * 1000) { - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( HOLD_ON_TOO_SLOW_ALARM, "Stopping Go pipeline " + configName + " took " + ToString(stopCost) + "ms"); } } diff --git a/core/logtail_windows.cpp b/core/logtail_windows.cpp index 6ff1bf28e6..2e28f38637 100644 --- a/core/logtail_windows.cpp +++ b/core/logtail_windows.cpp @@ -20,7 +20,7 @@ #include "application/Application.h" #include "common/Flags.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" using namespace logtail; diff --git a/core/monitor/LogtailAlarm.cpp b/core/monitor/AlarmManager.cpp similarity index 92% rename from core/monitor/LogtailAlarm.cpp rename to core/monitor/AlarmManager.cpp index 0abf2a3ebd..d4f0edfbf6 100644 --- a/core/monitor/LogtailAlarm.cpp +++ b/core/monitor/AlarmManager.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "LogFileProfiler.h" #include "app_config/AppConfig.h" @@ -38,7 +38,7 @@ namespace logtail { const string ALARM_SLS_LOGSTORE_NAME = "logtail_alarm"; -LogtailAlarm::LogtailAlarm() { +AlarmManager::AlarmManager() { mMessageType.resize(ALL_LOGTAIL_ALARM_NUM); mMessageType[USER_CONFIG_ALARM] = "USER_CONFIG_ALARM"; mMessageType[GLOBAL_CONFIG_ALARM] = "GLOBAL_CONFIG_ALARM"; @@ -106,11 +106,11 @@ LogtailAlarm::LogtailAlarm() { mMessageType[RELABEL_METRIC_FAIL_ALARM] = "RELABEL_METRIC_FAIL_ALARM"; } -void LogtailAlarm::Init() { - mThreadRes = async(launch::async, &LogtailAlarm::SendAlarmLoop, this); +void AlarmManager::Init() { + mThreadRes = async(launch::async, &AlarmManager::SendAlarmLoop, this); } -void LogtailAlarm::Stop() { +void AlarmManager::Stop() { ForceToSend(); { lock_guard lock(mThreadRunningMux); @@ -125,7 +125,7 @@ void LogtailAlarm::Stop() { } } -bool LogtailAlarm::SendAlarmLoop() { +bool AlarmManager::SendAlarmLoop() { LOG_INFO(sLogger, ("alarm gathering", "started")); { unique_lock lock(mThreadRunningMux); @@ -140,8 +140,8 @@ bool LogtailAlarm::SendAlarmLoop() { return true; } -void LogtailAlarm::SendAllRegionAlarm() { - LogtailAlarmMessage* messagePtr = nullptr; +void AlarmManager::SendAllRegionAlarm() { + AlarmMessage* messagePtr = nullptr; int32_t currentTime = time(nullptr); size_t sendRegionIndex = 0; size_t sendAlarmTypeIndex = 0; @@ -161,7 +161,7 @@ void LogtailAlarm::SendAllRegionAlarm() { } region = allAlarmIter->first; // LOG_DEBUG(sLogger, ("1Send Alarm", region)("region", sendRegionIndex)); - LogtailAlarmVector& alarmBufferVec = *(allAlarmIter->second.first); + AlarmVector& alarmBufferVec = *(allAlarmIter->second.first); std::vector& lastUpdateTimeVec = allAlarmIter->second.second; // check this region end if (sendAlarmTypeIndex >= alarmBufferVec.size()) { @@ -186,7 +186,7 @@ void LogtailAlarm::SendAllRegionAlarm() { // LOG_DEBUG(sLogger, ("3Send Alarm", region)("region", sendRegionIndex)("alarm index", // mMessageType[sendAlarmTypeIndex])); - map& alarmMap = alarmBufferVec[sendAlarmTypeIndex]; + map& alarmMap = alarmBufferVec[sendAlarmTypeIndex]; if (alarmMap.size() == 0 || currentTime - lastUpdateTimeVec[sendAlarmTypeIndex] < INT32_FLAG(logtail_alarm_interval)) { // go next alarm type @@ -220,7 +220,7 @@ void LogtailAlarm::SendAllRegionAlarm() { logGroup.set_source(LogFileProfiler::mIpAddr); logGroup.set_category(ALARM_SLS_LOGSTORE_NAME); auto now = GetCurrentLogtailTime(); - for (map::iterator mapIter = alarmMap.begin(); mapIter != alarmMap.end(); + for (map::iterator mapIter = alarmMap.begin(); mapIter != alarmMap.end(); ++mapIter) { messagePtr = mapIter->second; @@ -280,12 +280,12 @@ void LogtailAlarm::SendAllRegionAlarm() { } while (true); } -LogtailAlarm::LogtailAlarmVector* LogtailAlarm::MakesureLogtailAlarmMapVecUnlocked(const string& region) { +AlarmManager::AlarmVector* AlarmManager::MakesureLogtailAlarmMapVecUnlocked(const string& region) { // @todo // string region; auto iter = mAllAlarmMap.find(region); if (iter == mAllAlarmMap.end()) { - auto pMapVec = std::make_shared(); + auto pMapVec = std::make_shared(); // need resize to init this obj pMapVec->resize(ALL_LOGTAIL_ALARM_NUM); @@ -300,7 +300,7 @@ LogtailAlarm::LogtailAlarmVector* LogtailAlarm::MakesureLogtailAlarmMapVecUnlock return iter->second.first.get(); } -void LogtailAlarm::SendAlarm(const LogtailAlarmType alarmType, +void AlarmManager::SendAlarm(const AlarmType alarmType, const std::string& message, const std::string& projectName, const std::string& category, @@ -317,20 +317,20 @@ void LogtailAlarm::SendAlarm(const LogtailAlarmType alarmType, // mMessageType[alarmType])("msg", message)); std::lock_guard lock(mAlarmBufferMutex); string key = projectName + "_" + category; - LogtailAlarmVector& alarmBufferVec = *MakesureLogtailAlarmMapVecUnlocked(region); + AlarmVector& alarmBufferVec = *MakesureLogtailAlarmMapVecUnlocked(region); if (alarmBufferVec[alarmType].find(key) == alarmBufferVec[alarmType].end()) { - LogtailAlarmMessage* messagePtr - = new LogtailAlarmMessage(mMessageType[alarmType], projectName, category, message, 1); - alarmBufferVec[alarmType].insert(pair(key, messagePtr)); + AlarmMessage* messagePtr + = new AlarmMessage(mMessageType[alarmType], projectName, category, message, 1); + alarmBufferVec[alarmType].insert(pair(key, messagePtr)); } else alarmBufferVec[alarmType][key]->IncCount(); } -void LogtailAlarm::ForceToSend() { +void AlarmManager::ForceToSend() { INT32_FLAG(logtail_alarm_interval) = 0; } -bool LogtailAlarm::IsLowLevelAlarmValid() { +bool AlarmManager::IsLowLevelAlarmValid() { int32_t curTime = time(NULL); if (curTime == mLastLowLevelTime) { if (++mLastLowLevelCount > INT32_FLAG(logtail_low_level_alarm_speed)) { diff --git a/core/monitor/LogtailAlarm.h b/core/monitor/AlarmManager.h similarity index 87% rename from core/monitor/LogtailAlarm.h rename to core/monitor/AlarmManager.h index bd375c8f1c..b8c248c2e5 100644 --- a/core/monitor/LogtailAlarm.h +++ b/core/monitor/AlarmManager.h @@ -30,7 +30,7 @@ namespace logtail { -enum LogtailAlarmType { +enum AlarmType { USER_CONFIG_ALARM = 0, GLOBAL_CONFIG_ALARM = 1, DOMAIN_SOCKET_BIND_ALARM = 2, @@ -98,14 +98,14 @@ enum LogtailAlarmType { ALL_LOGTAIL_ALARM_NUM = 68 }; -struct LogtailAlarmMessage { +struct AlarmMessage { std::string mMessageType; std::string mProjectName; std::string mCategory; std::string mMessage; int32_t mCount; - LogtailAlarmMessage(const std::string& type, + AlarmMessage(const std::string& type, const std::string& projectName, const std::string& category, const std::string& message, @@ -114,17 +114,17 @@ struct LogtailAlarmMessage { void IncCount(int32_t inc = 1) { mCount += inc; } }; -class LogtailAlarm { +class AlarmManager { public: - static LogtailAlarm* GetInstance() { - static LogtailAlarm instance; + static AlarmManager* GetInstance() { + static AlarmManager instance; return &instance; } void Init(); void Stop(); - void SendAlarm(const LogtailAlarmType alarmType, + void SendAlarm(const AlarmType alarmType, const std::string& message, const std::string& projectName = "", const std::string& category = "", @@ -134,14 +134,14 @@ class LogtailAlarm { bool IsLowLevelAlarmValid(); private: - typedef std::vector > LogtailAlarmVector; + typedef std::vector > AlarmVector; - LogtailAlarm(); - ~LogtailAlarm() = default; + AlarmManager(); + ~AlarmManager() = default; bool SendAlarmLoop(); // without lock - LogtailAlarmVector* MakesureLogtailAlarmMapVecUnlocked(const std::string& region); + AlarmVector* MakesureLogtailAlarmMapVecUnlocked(const std::string& region); void SendAllRegionAlarm(); std::future mThreadRes; @@ -151,7 +151,7 @@ class LogtailAlarm { std::vector mMessageType; - std::map, std::vector > > mAllAlarmMap; + std::map, std::vector > > mAllAlarmMap; PTMutex mAlarmBufferMutex; std::atomic_int mLastLowLevelTime{0}; diff --git a/core/monitor/MetricExportor.cpp b/core/monitor/MetricExportor.cpp index 99c829d8d1..2440fe2106 100644 --- a/core/monitor/MetricExportor.cpp +++ b/core/monitor/MetricExportor.cpp @@ -17,7 +17,7 @@ #include #include "LogFileProfiler.h" -#include "LogtailMetric.h" +#include "MetricManager.h" #include "MetricConstants.h" #include "app_config/AppConfig.h" #include "common/FileSystemUtil.h" diff --git a/core/monitor/LogtailMetric.cpp b/core/monitor/MetricManager.cpp similarity index 61% rename from core/monitor/LogtailMetric.cpp rename to core/monitor/MetricManager.cpp index 239f6f8c9c..4b0dada653 100644 --- a/core/monitor/LogtailMetric.cpp +++ b/core/monitor/MetricManager.cpp @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "LogtailMetric.h" +#include "MetricManager.h" -#include "MetricConstants.h" #include "app_config/AppConfig.h" #include "common/StringTools.h" #include "common/TimeUtil.h" @@ -25,233 +24,6 @@ using namespace sls_logs; namespace logtail { -const std::string METRIC_KEY_LABEL = "label"; -const std::string METRIC_KEY_VALUE = "value"; -const std::string METRIC_KEY_CATEGORY = "category"; -const std::string MetricCategory::METRIC_CATEGORY_UNKNOWN = "unknown"; -const std::string MetricCategory::METRIC_CATEGORY_AGENT = "agent"; -const std::string MetricCategory::METRIC_CATEGORY_RUNNER = "runner"; -const std::string MetricCategory::METRIC_CATEGORY_PIPELINE = "pipeline"; -const std::string MetricCategory::METRIC_CATEGORY_COMPONENT = "component"; -const std::string MetricCategory::METRIC_CATEGORY_PLUGIN = "plugin"; -const std::string MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE = "plugin_source"; - -MetricsRecord::MetricsRecord(const std::string& category, MetricLabelsPtr labels, DynamicMetricLabelsPtr dynamicLabels) - : mCategory(category), mLabels(labels), mDynamicLabels(dynamicLabels), mDeleted(false) { -} - -CounterPtr MetricsRecord::CreateCounter(const std::string& name) { - CounterPtr counterPtr = std::make_shared(name); - mCounters.emplace_back(counterPtr); - return counterPtr; -} - -TimeCounterPtr MetricsRecord::CreateTimeCounter(const std::string& name) { - TimeCounterPtr counterPtr = std::make_shared(name); - mTimeCounters.emplace_back(counterPtr); - return counterPtr; -} - -IntGaugePtr MetricsRecord::CreateIntGauge(const std::string& name) { - IntGaugePtr gaugePtr = std::make_shared(name); - mIntGauges.emplace_back(gaugePtr); - return gaugePtr; -} - -DoubleGaugePtr MetricsRecord::CreateDoubleGauge(const std::string& name) { - DoubleGaugePtr gaugePtr = std::make_shared>(name); - mDoubleGauges.emplace_back(gaugePtr); - return gaugePtr; -} - -void MetricsRecord::MarkDeleted() { - mDeleted = true; -} - -bool MetricsRecord::IsDeleted() const { - return mDeleted; -} - -const std::string& MetricsRecord::GetCategory() const { - return mCategory; -} - -const MetricLabelsPtr& MetricsRecord::GetLabels() const { - return mLabels; -} - -const DynamicMetricLabelsPtr& MetricsRecord::GetDynamicLabels() const { - return mDynamicLabels; -} - -const std::vector& MetricsRecord::GetCounters() const { - return mCounters; -} - -const std::vector& MetricsRecord::GetTimeCounters() const { - return mTimeCounters; -} - -const std::vector& MetricsRecord::GetIntGauges() const { - return mIntGauges; -} - -const std::vector& MetricsRecord::GetDoubleGauges() const { - return mDoubleGauges; -} - -MetricsRecord* MetricsRecord::Collect() { - MetricsRecord* metrics = new MetricsRecord(mCategory, mLabels, mDynamicLabels); - for (auto& item : mCounters) { - CounterPtr newPtr(item->Collect()); - metrics->mCounters.emplace_back(newPtr); - } - for (auto& item : mTimeCounters) { - TimeCounterPtr newPtr(item->Collect()); - metrics->mTimeCounters.emplace_back(newPtr); - } - for (auto& item : mIntGauges) { - IntGaugePtr newPtr(item->Collect()); - metrics->mIntGauges.emplace_back(newPtr); - } - for (auto& item : mDoubleGauges) { - DoubleGaugePtr newPtr(item->Collect()); - metrics->mDoubleGauges.emplace_back(newPtr); - } - return metrics; -} - -MetricsRecord* MetricsRecord::GetNext() const { - return mNext; -} - -void MetricsRecord::SetNext(MetricsRecord* next) { - mNext = next; -} - -MetricsRecordRef::~MetricsRecordRef() { - if (mMetrics) { - mMetrics->MarkDeleted(); - } -} - -void MetricsRecordRef::SetMetricsRecord(MetricsRecord* metricRecord) { - mMetrics = metricRecord; -} - -const std::string& MetricsRecordRef::GetCategory() const { - return mMetrics->GetCategory(); -} - -const MetricLabelsPtr& MetricsRecordRef::GetLabels() const { - return mMetrics->GetLabels(); -} - -const DynamicMetricLabelsPtr& MetricsRecordRef::GetDynamicLabels() const { - return mMetrics->GetDynamicLabels(); -} - -CounterPtr MetricsRecordRef::CreateCounter(const std::string& name) { - return mMetrics->CreateCounter(name); -} - -TimeCounterPtr MetricsRecordRef::CreateTimeCounter(const std::string& name) { - return mMetrics->CreateTimeCounter(name); -} - -IntGaugePtr MetricsRecordRef::CreateIntGauge(const std::string& name) { - return mMetrics->CreateIntGauge(name); -} - -DoubleGaugePtr MetricsRecordRef::CreateDoubleGauge(const std::string& name) { - return mMetrics->CreateDoubleGauge(name); -} - -const MetricsRecord* MetricsRecordRef::operator->() const { - return mMetrics; -} - -void MetricsRecordRef::AddLabels(MetricLabels&& labels) { - mMetrics->GetLabels()->insert(mMetrics->GetLabels()->end(), labels.begin(), labels.end()); -} - -#ifdef APSARA_UNIT_TEST_MAIN -bool MetricsRecordRef::HasLabel(const std::string& key, const std::string& value) const { - for (auto item : *(mMetrics->GetLabels())) { - if (item.first == key && item.second == value) { - return true; - } - } - return false; -} -#endif - -// ReentrantMetricsRecord相关操作可以无锁,因为mCounters、mGauges只在初始化时会添加内容,后续只允许Get操作 -void ReentrantMetricsRecord::Init(const std::string& category, - MetricLabels& labels, - DynamicMetricLabels& dynamicLabels, - std::unordered_map& metricKeys) { - WriteMetrics::GetInstance()->PrepareMetricsRecordRef( - mMetricsRecordRef, category, std::move(labels), std::move(dynamicLabels)); - for (auto metric : metricKeys) { - switch (metric.second) { - case MetricType::METRIC_TYPE_COUNTER: - mCounters[metric.first] = mMetricsRecordRef.CreateCounter(metric.first); - break; - case MetricType::METRIC_TYPE_TIME_COUNTER: - mTimeCounters[metric.first] = mMetricsRecordRef.CreateTimeCounter(metric.first); - case MetricType::METRIC_TYPE_INT_GAUGE: - mIntGauges[metric.first] = mMetricsRecordRef.CreateIntGauge(metric.first); - break; - case MetricType::METRIC_TYPE_DOUBLE_GAUGE: - mDoubleGauges[metric.first] = mMetricsRecordRef.CreateDoubleGauge(metric.first); - break; - default: - break; - } - } -} - -const MetricLabelsPtr& ReentrantMetricsRecord::GetLabels() const { - return mMetricsRecordRef->GetLabels(); -} - -const DynamicMetricLabelsPtr& ReentrantMetricsRecord::GetDynamicLabels() const { - return mMetricsRecordRef->GetDynamicLabels(); -} - -CounterPtr ReentrantMetricsRecord::GetCounter(const std::string& name) { - auto it = mCounters.find(name); - if (it != mCounters.end()) { - return it->second; - } - return nullptr; -} - -TimeCounterPtr ReentrantMetricsRecord::GetTimeCounter(const std::string& name) { - auto it = mTimeCounters.find(name); - if (it != mTimeCounters.end()) { - return it->second; - } - return nullptr; -} - -IntGaugePtr ReentrantMetricsRecord::GetIntGauge(const std::string& name) { - auto it = mIntGauges.find(name); - if (it != mIntGauges.end()) { - return it->second; - } - return nullptr; -} - -DoubleGaugePtr ReentrantMetricsRecord::GetDoubleGauge(const std::string& name) { - auto it = mDoubleGauges.find(name); - if (it != mDoubleGauges.end()) { - return it->second; - } - return nullptr; -} - WriteMetrics::~WriteMetrics() { Clear(); } diff --git a/core/monitor/MetricManager.h b/core/monitor/MetricManager.h new file mode 100644 index 0000000000..1d54b1cfce --- /dev/null +++ b/core/monitor/MetricManager.h @@ -0,0 +1,89 @@ +/* + * Copyright 2023 iLogtail Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "MetricRecord.h" +#include "common/Lock.h" +#include "protobuf/sls/sls_logs.pb.h" + +namespace logtail { + +class WriteMetrics { +private: + WriteMetrics() = default; + std::mutex mMutex; + MetricsRecord* mHead = nullptr; + + void Clear(); + MetricsRecord* GetHead(); + +public: + ~WriteMetrics(); + static WriteMetrics* GetInstance() { + static WriteMetrics* ptr = new WriteMetrics(); + return ptr; + } + + void PrepareMetricsRecordRef(MetricsRecordRef& ref, + const std::string& category, + MetricLabels&& labels, + DynamicMetricLabels&& dynamicLabels = {}); + void CreateMetricsRecordRef(MetricsRecordRef& ref, + const std::string& category, + MetricLabels&& labels, + DynamicMetricLabels&& dynamicLabels = {}); + void CommitMetricsRecordRef(MetricsRecordRef& ref); + MetricsRecord* DoSnapshot(); + + +#ifdef APSARA_UNIT_TEST_MAIN + friend class MetricManagerUnittest; +#endif +}; + +class ReadMetrics { +private: + ReadMetrics() = default; + mutable ReadWriteLock mReadWriteLock; + MetricsRecord* mHead = nullptr; + void Clear(); + MetricsRecord* GetHead(); + +public: + ~ReadMetrics(); + static ReadMetrics* GetInstance() { + static ReadMetrics* ptr = new ReadMetrics(); + return ptr; + } + void ReadAsLogGroup(const std::string& regionFieldName, + const std::string& defaultRegion, + std::map& logGroupMap) const; + void ReadAsFileBuffer(std::string& metricsContent) const; + void UpdateMetrics(); + +#ifdef APSARA_UNIT_TEST_MAIN + friend class MetricManagerUnittest; +#endif +}; + +} // namespace logtail diff --git a/core/monitor/MetricRecord.cpp b/core/monitor/MetricRecord.cpp new file mode 100644 index 0000000000..8e27533db5 --- /dev/null +++ b/core/monitor/MetricRecord.cpp @@ -0,0 +1,182 @@ +/* + * Copyright 2024 iLogtail Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MetricRecord.h" + +namespace logtail { + +const std::string METRIC_KEY_LABEL = "label"; +const std::string METRIC_KEY_VALUE = "value"; +const std::string METRIC_KEY_CATEGORY = "category"; +const std::string MetricCategory::METRIC_CATEGORY_UNKNOWN = "unknown"; +const std::string MetricCategory::METRIC_CATEGORY_AGENT = "agent"; +const std::string MetricCategory::METRIC_CATEGORY_RUNNER = "runner"; +const std::string MetricCategory::METRIC_CATEGORY_PIPELINE = "pipeline"; +const std::string MetricCategory::METRIC_CATEGORY_COMPONENT = "component"; +const std::string MetricCategory::METRIC_CATEGORY_PLUGIN = "plugin"; +const std::string MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE = "plugin_source"; + +MetricsRecord::MetricsRecord(const std::string& category, MetricLabelsPtr labels, DynamicMetricLabelsPtr dynamicLabels) + : mCategory(category), mLabels(labels), mDynamicLabels(dynamicLabels), mDeleted(false) { +} + +CounterPtr MetricsRecord::CreateCounter(const std::string& name) { + CounterPtr counterPtr = std::make_shared(name); + mCounters.emplace_back(counterPtr); + return counterPtr; +} + +TimeCounterPtr MetricsRecord::CreateTimeCounter(const std::string& name) { + TimeCounterPtr counterPtr = std::make_shared(name); + mTimeCounters.emplace_back(counterPtr); + return counterPtr; +} + +IntGaugePtr MetricsRecord::CreateIntGauge(const std::string& name) { + IntGaugePtr gaugePtr = std::make_shared(name); + mIntGauges.emplace_back(gaugePtr); + return gaugePtr; +} + +DoubleGaugePtr MetricsRecord::CreateDoubleGauge(const std::string& name) { + DoubleGaugePtr gaugePtr = std::make_shared>(name); + mDoubleGauges.emplace_back(gaugePtr); + return gaugePtr; +} + +void MetricsRecord::MarkDeleted() { + mDeleted = true; +} + +bool MetricsRecord::IsDeleted() const { + return mDeleted; +} + +const std::string& MetricsRecord::GetCategory() const { + return mCategory; +} + +const MetricLabelsPtr& MetricsRecord::GetLabels() const { + return mLabels; +} + +const DynamicMetricLabelsPtr& MetricsRecord::GetDynamicLabels() const { + return mDynamicLabels; +} + +const std::vector& MetricsRecord::GetCounters() const { + return mCounters; +} + +const std::vector& MetricsRecord::GetTimeCounters() const { + return mTimeCounters; +} + +const std::vector& MetricsRecord::GetIntGauges() const { + return mIntGauges; +} + +const std::vector& MetricsRecord::GetDoubleGauges() const { + return mDoubleGauges; +} + +MetricsRecord* MetricsRecord::Collect() { + MetricsRecord* metrics = new MetricsRecord(mCategory, mLabels, mDynamicLabels); + for (auto& item : mCounters) { + CounterPtr newPtr(item->Collect()); + metrics->mCounters.emplace_back(newPtr); + } + for (auto& item : mTimeCounters) { + TimeCounterPtr newPtr(item->Collect()); + metrics->mTimeCounters.emplace_back(newPtr); + } + for (auto& item : mIntGauges) { + IntGaugePtr newPtr(item->Collect()); + metrics->mIntGauges.emplace_back(newPtr); + } + for (auto& item : mDoubleGauges) { + DoubleGaugePtr newPtr(item->Collect()); + metrics->mDoubleGauges.emplace_back(newPtr); + } + return metrics; +} + +MetricsRecord* MetricsRecord::GetNext() const { + return mNext; +} + +void MetricsRecord::SetNext(MetricsRecord* next) { + mNext = next; +} + +MetricsRecordRef::~MetricsRecordRef() { + if (mMetrics) { + mMetrics->MarkDeleted(); + } +} + +void MetricsRecordRef::SetMetricsRecord(MetricsRecord* metricRecord) { + mMetrics = metricRecord; +} + +const std::string& MetricsRecordRef::GetCategory() const { + return mMetrics->GetCategory(); +} + +const MetricLabelsPtr& MetricsRecordRef::GetLabels() const { + return mMetrics->GetLabels(); +} + +const DynamicMetricLabelsPtr& MetricsRecordRef::GetDynamicLabels() const { + return mMetrics->GetDynamicLabels(); +} + +CounterPtr MetricsRecordRef::CreateCounter(const std::string& name) { + return mMetrics->CreateCounter(name); +} + +TimeCounterPtr MetricsRecordRef::CreateTimeCounter(const std::string& name) { + return mMetrics->CreateTimeCounter(name); +} + +IntGaugePtr MetricsRecordRef::CreateIntGauge(const std::string& name) { + return mMetrics->CreateIntGauge(name); +} + +DoubleGaugePtr MetricsRecordRef::CreateDoubleGauge(const std::string& name) { + return mMetrics->CreateDoubleGauge(name); +} + +const MetricsRecord* MetricsRecordRef::operator->() const { + return mMetrics; +} + +void MetricsRecordRef::AddLabels(MetricLabels&& labels) { + mMetrics->GetLabels()->insert(mMetrics->GetLabels()->end(), labels.begin(), labels.end()); +} + +#ifdef APSARA_UNIT_TEST_MAIN +bool MetricsRecordRef::HasLabel(const std::string& key, const std::string& value) const { + for (auto item : *(mMetrics->GetLabels())) { + if (item.first == key && item.second == value) { + return true; + } + } + return false; +} +#endif + +} \ No newline at end of file diff --git a/core/monitor/LogtailMetric.h b/core/monitor/MetricRecord.h similarity index 59% rename from core/monitor/LogtailMetric.h rename to core/monitor/MetricRecord.h index 2065f2184f..487be8a65c 100644 --- a/core/monitor/LogtailMetric.h +++ b/core/monitor/MetricRecord.h @@ -1,5 +1,5 @@ /* - * Copyright 2023 iLogtail Authors + * Copyright 2024 iLogtail Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,16 +15,7 @@ */ #pragma once - -#include -#include -#include -#include -#include - -#include "common/Lock.h" -#include "monitor/LoongCollectorMetricTypes.h" -#include "protobuf/sls/sls_logs.pb.h" +#include "MetricTypes.h" namespace logtail { @@ -123,84 +114,4 @@ inline bool operator!=(std::nullptr_t lhs, const MetricsRecordRef& rhs) { return !(lhs == rhs); } -class ReentrantMetricsRecord { -private: - MetricsRecordRef mMetricsRecordRef; - std::unordered_map mCounters; - std::unordered_map mTimeCounters; - std::unordered_map mIntGauges; - std::unordered_map mDoubleGauges; - -public: - void Init(const std::string& category, - MetricLabels& labels, - DynamicMetricLabels& dynamicLabels, - std::unordered_map& metricKeys); - const MetricLabelsPtr& GetLabels() const; - const DynamicMetricLabelsPtr& GetDynamicLabels() const; - CounterPtr GetCounter(const std::string& name); - TimeCounterPtr GetTimeCounter(const std::string& name); - IntGaugePtr GetIntGauge(const std::string& name); - DoubleGaugePtr GetDoubleGauge(const std::string& name); -}; -using ReentrantMetricsRecordRef = std::shared_ptr; - -class WriteMetrics { -private: - WriteMetrics() = default; - std::mutex mMutex; - MetricsRecord* mHead = nullptr; - - void Clear(); - MetricsRecord* GetHead(); - -public: - ~WriteMetrics(); - static WriteMetrics* GetInstance() { - static WriteMetrics* ptr = new WriteMetrics(); - return ptr; - } - - void PrepareMetricsRecordRef(MetricsRecordRef& ref, - const std::string& category, - MetricLabels&& labels, - DynamicMetricLabels&& dynamicLabels = {}); - void CreateMetricsRecordRef(MetricsRecordRef& ref, - const std::string& category, - MetricLabels&& labels, - DynamicMetricLabels&& dynamicLabels = {}); - void CommitMetricsRecordRef(MetricsRecordRef& ref); - MetricsRecord* DoSnapshot(); - - -#ifdef APSARA_UNIT_TEST_MAIN - friend class ILogtailMetricUnittest; -#endif -}; - -class ReadMetrics { -private: - ReadMetrics() = default; - mutable ReadWriteLock mReadWriteLock; - MetricsRecord* mHead = nullptr; - void Clear(); - MetricsRecord* GetHead(); - -public: - ~ReadMetrics(); - static ReadMetrics* GetInstance() { - static ReadMetrics* ptr = new ReadMetrics(); - return ptr; - } - void ReadAsLogGroup(const std::string& regionFieldName, - const std::string& defaultRegion, - std::map& logGroupMap) const; - void ReadAsFileBuffer(std::string& metricsContent) const; - void UpdateMetrics(); - -#ifdef APSARA_UNIT_TEST_MAIN - friend class ILogtailMetricUnittest; -#endif -}; - -} // namespace logtail +} \ No newline at end of file diff --git a/core/monitor/LoongCollectorMetricTypes.h b/core/monitor/MetricTypes.h similarity index 100% rename from core/monitor/LoongCollectorMetricTypes.h rename to core/monitor/MetricTypes.h diff --git a/core/monitor/Monitor.cpp b/core/monitor/Monitor.cpp index 3bb00943f1..e09ba3e0b0 100644 --- a/core/monitor/Monitor.cpp +++ b/core/monitor/Monitor.cpp @@ -36,7 +36,7 @@ #include "go_pipeline/LogtailPlugin.h" #include "logger/Logger.h" #include "monitor/LogFileProfiler.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "monitor/MetricExportor.h" #include "plugin/flusher/sls/FlusherSLS.h" #include "protobuf/sls/sls_logs.pb.h" @@ -240,9 +240,9 @@ bool LogtailMonitor::SendStatusProfile(bool suicide) { if (lastReadEventTime > 0 && (now.tv_sec - lastReadEventTime > AppConfig::GetInstance()->GetForceQuitReadTimeout())) { LOG_ERROR(sLogger, ("last read event time is too old", lastReadEventTime)("prepare force exit", "")); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( LOGTAIL_CRASH_ALARM, "last read event time is too old: " + ToString(lastReadEventTime) + " force exit"); - LogtailAlarm::GetInstance()->ForceToSend(); + AlarmManager::GetInstance()->ForceToSend(); sleep(10); _exit(1); } diff --git a/core/monitor/Monitor.h b/core/monitor/Monitor.h index c6656a548e..4705236d32 100644 --- a/core/monitor/Monitor.h +++ b/core/monitor/Monitor.h @@ -21,7 +21,7 @@ #include #include -#include "LogtailMetric.h" +#include "MetricManager.h" #include "MetricConstants.h" #include "MetricStore.h" diff --git a/core/monitor/PluginMetricManager.cpp b/core/monitor/PluginMetricManager.cpp index f39506971e..25a6ff41a1 100644 --- a/core/monitor/PluginMetricManager.cpp +++ b/core/monitor/PluginMetricManager.cpp @@ -17,6 +17,72 @@ namespace logtail { +// ReentrantMetricsRecord相关操作可以无锁,因为mCounters、mGauges只在初始化时会添加内容,后续只允许Get操作 +void ReentrantMetricsRecord::Init(const std::string& category, + MetricLabels& labels, + DynamicMetricLabels& dynamicLabels, + std::unordered_map& metricKeys) { + WriteMetrics::GetInstance()->PrepareMetricsRecordRef( + mMetricsRecordRef, category, std::move(labels), std::move(dynamicLabels)); + for (auto metric : metricKeys) { + switch (metric.second) { + case MetricType::METRIC_TYPE_COUNTER: + mCounters[metric.first] = mMetricsRecordRef.CreateCounter(metric.first); + break; + case MetricType::METRIC_TYPE_TIME_COUNTER: + mTimeCounters[metric.first] = mMetricsRecordRef.CreateTimeCounter(metric.first); + case MetricType::METRIC_TYPE_INT_GAUGE: + mIntGauges[metric.first] = mMetricsRecordRef.CreateIntGauge(metric.first); + break; + case MetricType::METRIC_TYPE_DOUBLE_GAUGE: + mDoubleGauges[metric.first] = mMetricsRecordRef.CreateDoubleGauge(metric.first); + break; + default: + break; + } + } +} + +const MetricLabelsPtr& ReentrantMetricsRecord::GetLabels() const { + return mMetricsRecordRef->GetLabels(); +} + +const DynamicMetricLabelsPtr& ReentrantMetricsRecord::GetDynamicLabels() const { + return mMetricsRecordRef->GetDynamicLabels(); +} + +CounterPtr ReentrantMetricsRecord::GetCounter(const std::string& name) { + auto it = mCounters.find(name); + if (it != mCounters.end()) { + return it->second; + } + return nullptr; +} + +TimeCounterPtr ReentrantMetricsRecord::GetTimeCounter(const std::string& name) { + auto it = mTimeCounters.find(name); + if (it != mTimeCounters.end()) { + return it->second; + } + return nullptr; +} + +IntGaugePtr ReentrantMetricsRecord::GetIntGauge(const std::string& name) { + auto it = mIntGauges.find(name); + if (it != mIntGauges.end()) { + return it->second; + } + return nullptr; +} + +DoubleGaugePtr ReentrantMetricsRecord::GetDoubleGauge(const std::string& name) { + auto it = mDoubleGauges.find(name); + if (it != mDoubleGauges.end()) { + return it->second; + } + return nullptr; +} + ReentrantMetricsRecordRef PluginMetricManager::GetOrCreateReentrantMetricsRecordRef(MetricLabels labels, DynamicMetricLabels dynamicLabels) { std::lock_guard lock(mutex); diff --git a/core/monitor/PluginMetricManager.h b/core/monitor/PluginMetricManager.h index dfa0a23ecb..1c12dce41f 100644 --- a/core/monitor/PluginMetricManager.h +++ b/core/monitor/PluginMetricManager.h @@ -17,10 +17,33 @@ #include #include -#include "LogtailMetric.h" +#include "MetricManager.h" namespace logtail { +class ReentrantMetricsRecord { +private: + MetricsRecordRef mMetricsRecordRef; + std::unordered_map mCounters; + std::unordered_map mTimeCounters; + std::unordered_map mIntGauges; + std::unordered_map mDoubleGauges; + +public: + void Init(const std::string& category, + MetricLabels& labels, + DynamicMetricLabels& dynamicLabels, + std::unordered_map& metricKeys); + const MetricLabelsPtr& GetLabels() const; + const DynamicMetricLabelsPtr& GetDynamicLabels() const; + CounterPtr GetCounter(const std::string& name); + TimeCounterPtr GetTimeCounter(const std::string& name); + IntGaugePtr GetIntGauge(const std::string& name); + DoubleGaugePtr GetDoubleGauge(const std::string& name); +}; +using ReentrantMetricsRecordRef = std::shared_ptr; + + class PluginMetricManager { public: PluginMetricManager(const MetricLabelsPtr defaultLabels, diff --git a/core/profile_sender/ProfileSender.cpp b/core/monitor/profile_sender/ProfileSender.cpp similarity index 98% rename from core/profile_sender/ProfileSender.cpp rename to core/monitor/profile_sender/ProfileSender.cpp index 7098f31ce1..f89667496c 100644 --- a/core/profile_sender/ProfileSender.cpp +++ b/core/monitor/profile_sender/ProfileSender.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "profile_sender/ProfileSender.h" +#include "ProfileSender.h" #include @@ -23,7 +23,7 @@ #include "common/LogtailCommonFlags.h" #include "logger/Logger.h" #ifdef __ENTERPRISE__ -#include "profile_sender/EnterpriseProfileSender.h" +#include "EnterpriseProfileSender.h" #endif #include "sdk/Exception.h" #include "sls_control/SLSControl.h" diff --git a/core/profile_sender/ProfileSender.h b/core/monitor/profile_sender/ProfileSender.h similarity index 100% rename from core/profile_sender/ProfileSender.h rename to core/monitor/profile_sender/ProfileSender.h diff --git a/core/pipeline/Pipeline.cpp b/core/pipeline/Pipeline.cpp index d80ebc5064..af9bfb4ac0 100644 --- a/core/pipeline/Pipeline.cpp +++ b/core/pipeline/Pipeline.cpp @@ -500,7 +500,7 @@ bool Pipeline::LoadGoPipelines() const { LOG_ERROR(mContext.GetLogger(), ("failed to init pipeline", "Go pipeline is invalid, see go_plugin.LOG for detail")( "Go pipeline num", "2")("Go pipeline content", content)("config", mName)); - LogtailAlarm::GetInstance()->SendAlarm(CATEGORY_CONFIG_ALARM, + AlarmManager::GetInstance()->SendAlarm(CATEGORY_CONFIG_ALARM, "Go pipeline is invalid, content: " + content + ", config: " + mName, mContext.GetProjectName(), mContext.GetLogstoreName(), @@ -519,7 +519,7 @@ bool Pipeline::LoadGoPipelines() const { LOG_ERROR(mContext.GetLogger(), ("failed to init pipeline", "Go pipeline is invalid, see go_plugin.LOG for detail")( "Go pipeline num", "1")("Go pipeline content", content)("config", mName)); - LogtailAlarm::GetInstance()->SendAlarm(CATEGORY_CONFIG_ALARM, + AlarmManager::GetInstance()->SendAlarm(CATEGORY_CONFIG_ALARM, "Go pipeline is invalid, content: " + content + ", config: " + mName, mContext.GetProjectName(), mContext.GetLogstoreName(), @@ -550,7 +550,7 @@ void Pipeline::WaitAllItemsInProcessFinished() { uint64_t duration = GetCurrentTimeInMilliSeconds() - startTime; if (!alarmOnce && duration > 10000) { // 10s LOG_ERROR(sLogger, ("pipeline stop", "too slow")("config", mName)("cost", duration)); - LogtailAlarm::GetInstance()->SendAlarm(CONFIG_UPDATE_ALARM, + AlarmManager::GetInstance()->SendAlarm(CONFIG_UPDATE_ALARM, string("pipeline stop too slow, config: ") + mName + "; cost:" + std::to_string(duration), mContext.GetProjectName(), diff --git a/core/pipeline/Pipeline.h b/core/pipeline/Pipeline.h index 9e4b0dd1be..d6a55911c7 100644 --- a/core/pipeline/Pipeline.h +++ b/core/pipeline/Pipeline.h @@ -25,7 +25,7 @@ #include "config/PipelineConfig.h" #include "models/PipelineEventGroup.h" -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "pipeline/PipelineContext.h" #include "pipeline/plugin/instance/FlusherInstance.h" #include "pipeline/plugin/instance/InputInstance.h" diff --git a/core/pipeline/PipelineContext.h b/core/pipeline/PipelineContext.h index 8eaa1ce487..e7aa2085ac 100644 --- a/core/pipeline/PipelineContext.h +++ b/core/pipeline/PipelineContext.h @@ -24,7 +24,7 @@ #include "logger/Logger.h" #include "models/PipelineEventGroup.h" #include "monitor/LogFileProfiler.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "pipeline/GlobalConfig.h" #include "pipeline/queue/QueueKey.h" @@ -88,7 +88,7 @@ class PipelineContext { ProcessProfile& GetProcessProfile() const { return mProcessProfile; } // LogFileProfiler& GetProfiler() { return *mProfiler; } const Logger::logger& GetLogger() const { return mLogger; } - LogtailAlarm& GetAlarm() const { return *mAlarm; }; + AlarmManager& GetAlarm() const { return *mAlarm; }; private: static const std::string sEmptyString; @@ -109,7 +109,7 @@ class PipelineContext { mutable ProcessProfile mProcessProfile; // LogFileProfiler* mProfiler = LogFileProfiler::GetInstance(); Logger::logger mLogger = sLogger; - LogtailAlarm* mAlarm = LogtailAlarm::GetInstance(); + AlarmManager* mAlarm = AlarmManager::GetInstance(); }; } // namespace logtail diff --git a/core/pipeline/PipelineManager.cpp b/core/pipeline/PipelineManager.cpp index da33c8bd20..1fe4709564 100644 --- a/core/pipeline/PipelineManager.cpp +++ b/core/pipeline/PipelineManager.cpp @@ -86,7 +86,7 @@ void logtail::PipelineManager::UpdatePipelines(PipelineConfigDiff& diff) { LOG_WARNING(sLogger, ("failed to build pipeline for existing config", "keep current pipeline running")("config", config.mName)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( CATEGORY_CONFIG_ALARM, "failed to build pipeline for existing config: keep current pipeline running, config: " + config.mName, config.mProject, @@ -113,7 +113,7 @@ void logtail::PipelineManager::UpdatePipelines(PipelineConfigDiff& diff) { if (!p) { LOG_WARNING(sLogger, ("failed to build pipeline for new config", "skip current object")("config", config.mName)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( CATEGORY_CONFIG_ALARM, "failed to build pipeline for new config: skip current object, config: " + config.mName, config.mProject, diff --git a/core/pipeline/batch/Batcher.h b/core/pipeline/batch/Batcher.h index 2b1898fd55..0d47087ffe 100644 --- a/core/pipeline/batch/Batcher.h +++ b/core/pipeline/batch/Batcher.h @@ -27,7 +27,7 @@ #include "common/Flags.h" #include "common/ParamExtractor.h" #include "models/PipelineEventGroup.h" -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "monitor/metric_constants/MetricConstants.h" #include "pipeline/PipelineContext.h" #include "pipeline/batch/BatchItem.h" diff --git a/core/pipeline/plugin/instance/ProcessorInstance.h b/core/pipeline/plugin/instance/ProcessorInstance.h index 6cd7f46d95..b7b5460c1c 100644 --- a/core/pipeline/plugin/instance/ProcessorInstance.h +++ b/core/pipeline/plugin/instance/ProcessorInstance.h @@ -21,7 +21,7 @@ #include #include "models/PipelineEventGroup.h" -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "pipeline/PipelineContext.h" #include "pipeline/plugin/instance/PluginInstance.h" #include "pipeline/plugin/interface/Processor.h" diff --git a/core/pipeline/plugin/interface/Flusher.cpp b/core/pipeline/plugin/interface/Flusher.cpp index 5ac469ebd4..f5c2453f4c 100644 --- a/core/pipeline/plugin/interface/Flusher.cpp +++ b/core/pipeline/plugin/interface/Flusher.cpp @@ -63,7 +63,7 @@ bool Flusher::PushToQueue(unique_ptr&& item, uint32_t retryTime LOG_ERROR(sLogger, ("failed to push data to sender queue", "queue not found")("action", "discard data")("config-flusher-dst", str)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( DISCARD_DATA_ALARM, "failed to push data to sender queue: queue not found\taction: discard data\tconfig-flusher-dst" + str); return false; @@ -78,7 +78,7 @@ bool Flusher::PushToQueue(unique_ptr&& item, uint32_t retryTime LOG_WARNING( sLogger, ("failed to push data to sender queue", "queue full")("action", "discard data")("config-flusher-dst", str)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( DISCARD_DATA_ALARM, "failed to push data to sender queue: queue full\taction: discard data\tconfig-flusher-dst" + str); return false; diff --git a/core/pipeline/plugin/interface/Plugin.h b/core/pipeline/plugin/interface/Plugin.h index 1dbd872756..999b10b921 100644 --- a/core/pipeline/plugin/interface/Plugin.h +++ b/core/pipeline/plugin/interface/Plugin.h @@ -20,7 +20,7 @@ #include #include -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "monitor/metric_constants/MetricConstants.h" #include "pipeline/PipelineContext.h" diff --git a/core/pipeline/queue/QueueInterface.h b/core/pipeline/queue/QueueInterface.h index b0ea6d0e47..2793d36d9e 100644 --- a/core/pipeline/queue/QueueInterface.h +++ b/core/pipeline/queue/QueueInterface.h @@ -16,7 +16,7 @@ #pragma once -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "monitor/metric_constants/MetricConstants.h" #include "pipeline/PipelineContext.h" #include "pipeline/queue/QueueKey.h" diff --git a/core/pipeline/route/Router.h b/core/pipeline/route/Router.h index 3a879ae66a..b4356c05d1 100644 --- a/core/pipeline/route/Router.h +++ b/core/pipeline/route/Router.h @@ -22,7 +22,7 @@ #include #include "models/PipelineEventGroup.h" -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "pipeline/route/Condition.h" namespace logtail { diff --git a/core/plugin/flusher/sls/DiskBufferWriter.cpp b/core/plugin/flusher/sls/DiskBufferWriter.cpp index 6be224f86a..2285658a1d 100644 --- a/core/plugin/flusher/sls/DiskBufferWriter.cpp +++ b/core/plugin/flusher/sls/DiskBufferWriter.cpp @@ -23,7 +23,7 @@ #include "common/RuntimeUtil.h" #include "common/StringTools.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "pipeline/limiter/RateLimiter.h" #include "pipeline/queue/QueueKeyManager.h" #include "pipeline/queue/SLSSenderQueueItem.h" @@ -106,7 +106,7 @@ bool DiskBufferWriter::PushToDiskBuffer(SenderQueueItem* item, uint32_t retryTim LOG_WARNING(sLogger, ("failed to add sender queue item to disk buffer writer", "queue is full")("action", "discard data")( "config-flusher-dst", QueueKeyManager::GetInstance()->GetName(item->mFlusher->GetQueueKey()))); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( DISCARD_DATA_ALARM, "failed to add sender queue item to disk buffer writer: queue is full\taction: discard data", flusher->mProject, @@ -188,13 +188,13 @@ void DiskBufferWriter::BufferSenderThread() { LOG_ERROR(sLogger, ("invalid key_version in header", kvMap[STRING_FLAG(file_encryption_field_key_version)])("delete bufffer file", fileName)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( DISCARD_SECONDARY_ALARM, "key version in buffer file invalid, delete file: " + fileName); } } else { remove(fileName.c_str()); LOG_WARNING(sLogger, ("check header of buffer file failed, delete file", fileName)); - LogtailAlarm::GetInstance()->SendAlarm(DISCARD_SECONDARY_ALARM, + AlarmManager::GetInstance()->SendAlarm(DISCARD_SECONDARY_ALARM, "check header of buffer file failed, delete file: " + fileName); } } @@ -240,14 +240,14 @@ bool DiskBufferWriter::LoadFileToSend(time_t timeLine, std::vector& LOG_WARNING(sLogger, ("buffer file path not exist", bufferFilePath)("logtail will not recreate external path", "local secondary does not work")); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("buffer file directory:") + bufferFilePath + " not exist"); return false; } string errorMessage; if (!RebuildExecutionDir(AppConfig::GetInstance()->GetIlogtailConfigJson(), errorMessage)) { LOG_ERROR(sLogger, ("failed to rebuild buffer file path", bufferFilePath)("errorMessage", errorMessage)); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, errorMessage); + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, errorMessage); return false; } else LOG_INFO(sLogger, ("rebuild buffer file path success", bufferFilePath)); @@ -257,7 +257,7 @@ bool DiskBufferWriter::LoadFileToSend(time_t timeLine, std::vector& if (!dir.Open()) { string errorStr = ErrnoToString(GetErrno()); LOG_ERROR(sLogger, ("open dir error", bufferFilePath)("reason", errorStr)); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("open dir error,dir:") + bufferFilePath + ",error:" + errorStr); return false; } @@ -297,7 +297,7 @@ bool DiskBufferWriter::ReadNextEncryption(int32_t& pos, break; if (retryTimes >= 3) { string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("open file error:") + filename + ",error:" + errorStr); LOG_ERROR(sLogger, ("open file error", filename)("error", errorStr)); return false; @@ -314,7 +314,7 @@ bool DiskBufferWriter::ReadNextEncryption(int32_t& pos, auto nbytes = fread(static_cast(&meta), sizeof(char), sizeof(meta), fin); if (nbytes != sizeof(meta)) { string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("read encryption file meta error:") + filename + ", error:" + errorStr + ", meta.mEncryptionSize:" + ToString(meta.mEncryptionSize) + ", nbytes: " + ToString(nbytes) @@ -334,7 +334,7 @@ bool DiskBufferWriter::ReadNextEncryption(int32_t& pos, } if (meta.mEncryptionSize < 0 || encodedInfoSize < 0) { - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("meta of encryption file invalid:" + filename + ", meta.mEncryptionSize:" + ToString(meta.mEncryptionSize) + ", meta.mEncodedInfoSize:" + ToString(meta.mEncodedInfoSize))); @@ -350,7 +350,7 @@ bool DiskBufferWriter::ReadNextEncryption(int32_t& pos, fclose(fin); if (meta.mHandled != 1) { LOG_WARNING(sLogger, ("timeout buffer file, meta.mTimeStamp", meta.mTimeStamp)); - LogtailAlarm::GetInstance()->SendAlarm(DISCARD_SECONDARY_ALARM, + AlarmManager::GetInstance()->SendAlarm(DISCARD_SECONDARY_ALARM, "buffer file timeout (1day), delete file: " + filename); } return true; @@ -361,7 +361,7 @@ bool DiskBufferWriter::ReadNextEncryption(int32_t& pos, if (nbytes != static_cast(encodedInfoSize)) { fclose(fin); string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("read projectname from file error:") + filename + ", error:" + errorStr + ", meta.mEncodedInfoSize:" + ToString(meta.mEncodedInfoSize) + ", nbytes:" + ToString(nbytes)); @@ -376,7 +376,7 @@ bool DiskBufferWriter::ReadNextEncryption(int32_t& pos, if (pbMeta) { if (!bufferMeta.ParseFromString(encodedInfo)) { fclose(fin); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("parse buffer meta from file error:") + filename); LOG_ERROR(sLogger, ("parse buffer meta from file error", filename)("buffer meta", encodedInfo)); bufferMeta.Clear(); @@ -399,7 +399,7 @@ bool DiskBufferWriter::ReadNextEncryption(int32_t& pos, if (nbytes != static_cast(meta.mEncryptionSize)) { fclose(fin); string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("read encryption from file error:") + filename + ",error:" + errorStr + ",meta.mEncryptionSize:" + ToString(meta.mEncryptionSize) + ", nbytes:" + ToString(nbytes)); @@ -443,7 +443,7 @@ void DiskBufferWriter::SendEncryptionBuffer(const std::string& filename, int32_t LOG_ERROR(sLogger, ("decrypt error, project_name", bufferMeta.project())("key_version", keyVersion)("meta.mLogDataSize", meta.mLogDataSize)); - LogtailAlarm::GetInstance()->SendAlarm(ENCRYPT_DECRYPT_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(ENCRYPT_DECRYPT_FAIL_ALARM, string("decrypt error, project_name:" + bufferMeta.project() + ", key_version:" + ToString(keyVersion) + ", meta.mLogDataSize:" + ToString(meta.mLogDataSize))); @@ -459,14 +459,14 @@ void DiskBufferWriter::SendEncryptionBuffer(const std::string& filename, int32_t LOG_ERROR(sLogger, ("parse error from string to loggroup, projectName is", bufferMeta.project())); discardCount++; - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( LOG_GROUP_PARSE_FAIL_ALARM, string("projectName is:" + bufferMeta.project() + ", fileName is:" + filename)); } else if (!CompressLz4(logGroupStr, logData)) { sendResult = true; LOG_ERROR(sLogger, ("LZ4 compress loggroup fail, projectName is", bufferMeta.project())); discardCount++; - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( SEND_COMPRESS_FAIL_ALARM, string("projectName is:" + bufferMeta.project() + ", fileName is:" + filename)); } else { @@ -483,7 +483,7 @@ void DiskBufferWriter::SendEncryptionBuffer(const std::string& filename, int32_t if (res == SEND_OK) sendResult = true; else if (res == SEND_DISCARD_ERROR || res == SEND_UNAUTHORIZED) { - LogtailAlarm::GetInstance()->SendAlarm(SEND_DATA_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(SEND_DATA_FAIL_ALARM, string("send buffer file fail, rawsize:") + ToString(bufferMeta.rawsize()) + "errorCode: " + errorCode, @@ -523,7 +523,7 @@ void DiskBufferWriter::SendEncryptionBuffer(const std::string& filename, int32_t remove(filename.c_str()); if (discardCount > 0) { LOG_ERROR(sLogger, ("send buffer file, discard LogGroup count", discardCount)("delete file", filename)); - LogtailAlarm::GetInstance()->SendAlarm(DISCARD_SECONDARY_ALARM, + AlarmManager::GetInstance()->SendAlarm(DISCARD_SECONDARY_ALARM, "delete buffer file: " + filename + ", discard " + ToString(discardCount) + " logGroups"); } else @@ -546,7 +546,7 @@ bool DiskBufferWriter::CreateNewFile() { ("buffer file count exceed limit", "file created earlier will be cleaned, and new file will create for new log data")("delete file", fileName)); - LogtailAlarm::GetInstance()->SendAlarm(DISCARD_SECONDARY_ALARM, + AlarmManager::GetInstance()->SendAlarm(DISCARD_SECONDARY_ALARM, "buffer file count exceed, delete file: " + fileName); } } @@ -562,7 +562,7 @@ bool DiskBufferWriter::WriteBackMeta(int32_t pos, const void* buf, int32_t lengt int fd = open(filename.c_str(), O_WRONLY); if (fd < 0) { string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("open secondary file for write meta fail:") + filename + ",reason:" + errorStr); LOG_ERROR(sLogger, ("open file error", filename)); @@ -571,7 +571,7 @@ bool DiskBufferWriter::WriteBackMeta(int32_t pos, const void* buf, int32_t lengt lseek(fd, pos, SEEK_SET); if (write(fd, buf, length) < 0) { string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("write secondary file for write meta fail:") + filename + ",reason:" + errorStr); LOG_ERROR(sLogger, ("can not write back meta", filename)); @@ -582,7 +582,7 @@ bool DiskBufferWriter::WriteBackMeta(int32_t pos, const void* buf, int32_t lengt FILE* f = FileWriteOnlyOpen(filename.c_str(), "wb"); if (!f) { string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("open secondary file for write meta fail:") + filename + ",reason:" + errorStr); LOG_ERROR(sLogger, ("open file error", filename)); @@ -592,7 +592,7 @@ bool DiskBufferWriter::WriteBackMeta(int32_t pos, const void* buf, int32_t lengt auto nbytes = fwrite(buf, 1, length, f); if (nbytes != length) { string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("write secondary file for write meta fail:") + filename + ",reason:" + errorStr); LOG_ERROR(sLogger, ("can not write back meta", filename)); @@ -623,7 +623,7 @@ bool DiskBufferWriter::SendToBufferFile(SenderQueueItem* dataPtr) { FILE* fout = FileAppendOpen(bufferFileName.c_str(), "ab"); if (!fout) { string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("open file error:") + bufferFileName + ",error:" + errorStr); LOG_ERROR(sLogger, ("open buffer file error", bufferFileName)); return false; @@ -634,7 +634,7 @@ bool DiskBufferWriter::SendToBufferFile(SenderQueueItem* dataPtr) { auto nbytes = fwrite(header.c_str(), 1, header.size(), fout); if (header.size() != nbytes) { string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("write file error:") + bufferFileName + ", error:" + errorStr + ", nbytes:" + ToString(nbytes)); LOG_ERROR(sLogger, ("error write encryption header", bufferFileName)("error", errorStr)("nbytes", nbytes)); @@ -648,7 +648,7 @@ bool DiskBufferWriter::SendToBufferFile(SenderQueueItem* dataPtr) { if (!FileEncryption::GetInstance()->Encrypt(data->mData.c_str(), data->mData.size(), des, desLength)) { fclose(fout); LOG_ERROR(sLogger, ("encrypt error, project_name", flusher->mProject)); - LogtailAlarm::GetInstance()->SendAlarm(ENCRYPT_DECRYPT_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(ENCRYPT_DECRYPT_FAIL_ALARM, string("encrypt error, project_name:" + flusher->mProject)); return false; } @@ -683,7 +683,7 @@ bool DiskBufferWriter::SendToBufferFile(SenderQueueItem* dataPtr) { auto nbytes = fwrite(buffer, 1, bytesToWrite, fout); if (nbytes != bytesToWrite) { string errorStr = ErrnoToString(GetErrno()); - LogtailAlarm::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, + AlarmManager::GetInstance()->SendAlarm(SECONDARY_READ_WRITE_ALARM, string("write file error:") + bufferFileName + ", error:" + errorStr + ", nbytes:" + ToString(nbytes)); LOG_ERROR( @@ -778,7 +778,7 @@ SendResult DiskBufferWriter::SendToNetSync(sdk::Client* sendClient, if (sendRes == SEND_DISCARD_ERROR || sendRes == SEND_UNAUTHORIZED || sendRes == SEND_QUOTA_EXCEED || retryTimes >= INT32_FLAG(send_retrytimes)) { if (sendRes == SEND_QUOTA_EXCEED) - LogtailAlarm::GetInstance()->SendAlarm(SEND_QUOTA_EXCEED_ALARM, + AlarmManager::GetInstance()->SendAlarm(SEND_QUOTA_EXCEED_ALARM, "error_code: " + errorCode + ", error_message: " + ex.GetMessage(), bufferMeta.project(), diff --git a/core/plugin/flusher/sls/FlusherSLS.cpp b/core/plugin/flusher/sls/FlusherSLS.cpp index b6cf23fdf4..7a518bd734 100644 --- a/core/plugin/flusher/sls/FlusherSLS.cpp +++ b/core/plugin/flusher/sls/FlusherSLS.cpp @@ -759,7 +759,7 @@ void FlusherSLS::OnSendDone(const HttpResponse& response, SenderQueueItem* item) mProjectQuotaErrorCnt->Add(1); } } - LogtailAlarm::GetInstance()->SendAlarm(SEND_QUOTA_EXCEED_ALARM, + AlarmManager::GetInstance()->SendAlarm(SEND_QUOTA_EXCEED_ALARM, "error_code: " + slsResponse.mErrorCode + ", error_message: " + slsResponse.mErrorMsg + ", request_id:" + slsResponse.mRequestId, @@ -811,7 +811,7 @@ void FlusherSLS::OnSendDone(const HttpResponse& response, SenderQueueItem* item) if (!cpt) { failDetail << ", unexpected result when exactly once checkpoint is not found"; suggestion << "report bug"; - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( EXACTLY_ONCE_ALARM, "drop exactly once log group because of invalid sequence ID, request id:" + slsResponse.mRequestId, @@ -829,7 +829,7 @@ void FlusherSLS::OnSendDone(const HttpResponse& response, SenderQueueItem* item) failDetail << ", drop exactly once log group and commit checkpoint" << " checkpointKey:" << cpt->key << " checkpoint:" << cpt->data.DebugString(); suggestion << "no suggestion"; - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( EXACTLY_ONCE_ALARM, "drop exactly once log group because of invalid sequence ID, cpt:" + cpt->key + ", data:" + cpt->data.DebugString() + "request id:" + slsResponse.mRequestId, @@ -897,7 +897,7 @@ void FlusherSLS::OnSendDone(const HttpResponse& response, SenderQueueItem* item) default: LOG_WARNING(sLogger, LOG_PATTERN); if (!isProfileData) { - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( SEND_DATA_FAIL_ALARM, "failed to send request: " + failDetail.str() + "\toperation: " + GetOperationString(operation) + "\trequestId: " + slsResponse.mRequestId @@ -1131,7 +1131,7 @@ bool FlusherSLS::PushToQueue(QueueKey key, unique_ptr&& item, u LOG_ERROR(sLogger, ("failed to push data to sender queue", "queue not found")("action", "discard data")("config-flusher-dst", str)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( DISCARD_DATA_ALARM, "failed to push data to sender queue: queue not found\taction: discard data\tconfig-flusher-dst" + str); return false; @@ -1146,7 +1146,7 @@ bool FlusherSLS::PushToQueue(QueueKey key, unique_ptr&& item, u LOG_WARNING( sLogger, ("failed to push data to sender queue", "queue full")("action", "discard data")("config-flusher-dst", str)); - LogtailAlarm::GetInstance()->SendAlarm( + AlarmManager::GetInstance()->SendAlarm( DISCARD_DATA_ALARM, "failed to push data to sender queue: queue full\taction: discard data\tconfig-flusher-dst" + str); return false; diff --git a/core/plugin/processor/ProcessorParseDelimiterNative.cpp b/core/plugin/processor/ProcessorParseDelimiterNative.cpp index 4f9b44fdf8..ff674cace5 100644 --- a/core/plugin/processor/ProcessorParseDelimiterNative.cpp +++ b/core/plugin/processor/ProcessorParseDelimiterNative.cpp @@ -301,7 +301,7 @@ bool ProcessorParseDelimiterNative::ProcessEvent(const StringView& logPath, Pipe parseSuccess = false; } } else { - LogtailAlarm::GetInstance()->SendAlarm(PARSE_LOG_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(PARSE_LOG_FAIL_ALARM, std::string("parse delimiter log fail") + ", logs:" + buffer.to_string(), GetContext().GetProjectName(), @@ -311,7 +311,7 @@ bool ProcessorParseDelimiterNative::ProcessEvent(const StringView& logPath, Pipe parseSuccess = false; } } else { - LogtailAlarm::GetInstance()->SendAlarm(PARSE_LOG_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(PARSE_LOG_FAIL_ALARM, "no column keys defined", GetContext().GetProjectName(), GetContext().GetLogstoreName(), diff --git a/core/plugin/processor/ProcessorParseJsonNative.cpp b/core/plugin/processor/ProcessorParseJsonNative.cpp index 90b3d075c5..e7f65a998e 100644 --- a/core/plugin/processor/ProcessorParseJsonNative.cpp +++ b/core/plugin/processor/ProcessorParseJsonNative.cpp @@ -124,12 +124,12 @@ bool ProcessorParseJsonNative::JsonLogLineParser(LogEvent& sourceEvent, rapidjson::Document doc; doc.Parse(buffer.data(), buffer.size()); if (doc.HasParseError()) { - if (LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + if (AlarmManager::GetInstance()->IsLowLevelAlarmValid()) { LOG_WARNING(sLogger, ("parse json log fail, log", buffer)("rapidjson offset", doc.GetErrorOffset())( "rapidjson error", doc.GetParseError())("project", GetContext().GetProjectName())( "logstore", GetContext().GetLogstoreName())("file", logPath)); - LogtailAlarm::GetInstance()->SendAlarm(PARSE_LOG_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(PARSE_LOG_FAIL_ALARM, std::string("parse json fail:") + buffer.to_string(), GetContext().GetProjectName(), GetContext().GetLogstoreName(), @@ -139,11 +139,11 @@ bool ProcessorParseJsonNative::JsonLogLineParser(LogEvent& sourceEvent, mOutFailedEventsTotal->Add(1); parseSuccess = false; } else if (!doc.IsObject()) { - if (LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + if (AlarmManager::GetInstance()->IsLowLevelAlarmValid()) { LOG_WARNING(sLogger, ("invalid json object, log", buffer)("project", GetContext().GetProjectName())( "logstore", GetContext().GetLogstoreName())("file", logPath)); - LogtailAlarm::GetInstance()->SendAlarm(PARSE_LOG_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(PARSE_LOG_FAIL_ALARM, std::string("invalid json object:") + buffer.to_string(), GetContext().GetProjectName(), GetContext().GetLogstoreName(), diff --git a/core/plugin/processor/ProcessorParseTimestampNative.cpp b/core/plugin/processor/ProcessorParseTimestampNative.cpp index 88306a3b6b..2bc5292398 100644 --- a/core/plugin/processor/ProcessorParseTimestampNative.cpp +++ b/core/plugin/processor/ProcessorParseTimestampNative.cpp @@ -149,7 +149,7 @@ bool ProcessorParseTimestampNative::ProcessEvent(StringView logPath, || (BOOL_FLAG(ilogtail_discard_old_data) && (time(NULL) - logTime.tv_sec) > INT32_FLAG(ilogtail_discard_interval))) { if (AppConfig::GetInstance()->IsLogParseAlarmValid()) { - if (LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + if (AlarmManager::GetInstance()->IsLowLevelAlarmValid()) { LOG_WARNING(sLogger, ("drop log event", "log time falls more than " + ToString(INT32_FLAG(ilogtail_discard_interval)) @@ -158,7 +158,7 @@ bool ProcessorParseTimestampNative::ProcessEvent(StringView logPath, "logstore", GetContext().GetLogstoreName())("config", GetContext().GetConfigName())( "file", logPath)); } - LogtailAlarm::GetInstance()->SendAlarm(OUTDATED_LOG_ALARM, + AlarmManager::GetInstance()->SendAlarm(OUTDATED_LOG_ALARM, std::string("logTime: ") + ToString(logTime.tv_sec), GetContext().GetProjectName(), GetContext().GetLogstoreName(), @@ -210,12 +210,12 @@ bool ProcessorParseTimestampNative::ParseLogTime(const StringView& curTimeStr, / } if (NULL == strptimeResult) { if (AppConfig::GetInstance()->IsLogParseAlarmValid()) { - if (LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + if (AlarmManager::GetInstance()->IsLowLevelAlarmValid()) { LOG_WARNING(sLogger, ("parse time fail", curTimeStr)("project", GetContext().GetProjectName())( "logstore", GetContext().GetLogstoreName())("file", logPath)); } - LogtailAlarm::GetInstance()->SendAlarm(PARSE_TIME_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(PARSE_TIME_FAIL_ALARM, curTimeStr.to_string() + " " + mSourceFormat, GetContext().GetProjectName(), GetContext().GetLogstoreName(), diff --git a/core/plugin/processor/ProcessorSPL.h b/core/plugin/processor/ProcessorSPL.h index c03a49bfc5..f06241cb6b 100644 --- a/core/plugin/processor/ProcessorSPL.h +++ b/core/plugin/processor/ProcessorSPL.h @@ -16,7 +16,7 @@ #include -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "pipeline/plugin/interface/Processor.h" namespace apsara::sls::spl { diff --git a/core/plugin/processor/inner/ProcessorMergeMultilineLogNative.cpp b/core/plugin/processor/inner/ProcessorMergeMultilineLogNative.cpp index 754202979d..64ccd57cc1 100644 --- a/core/plugin/processor/inner/ProcessorMergeMultilineLogNative.cpp +++ b/core/plugin/processor/inner/ProcessorMergeMultilineLogNative.cpp @@ -359,7 +359,7 @@ void ProcessorMergeMultilineLogNative::HandleUnmatchLogs( return; } for (size_t i = begin; i <= end; i++) { - if (!mMultiline.mIgnoringUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + if (!mMultiline.mIgnoringUnmatchWarning && AlarmManager::GetInstance()->IsLowLevelAlarmValid()) { StringView sourceVal = logEvents[i].Cast().GetContent(mSourceKey); LOG_WARNING( GetContext().GetLogger(), diff --git a/core/plugin/processor/inner/ProcessorParseContainerLogNative.cpp b/core/plugin/processor/inner/ProcessorParseContainerLogNative.cpp index 6170321736..4f649565ec 100644 --- a/core/plugin/processor/inner/ProcessorParseContainerLogNative.cpp +++ b/core/plugin/processor/inner/ProcessorParseContainerLogNative.cpp @@ -162,13 +162,13 @@ bool ProcessorParseContainerLogNative::ProcessEvent(StringView containerType, mOutFailedEventsTotal->Add(1); } - if (!mIgnoreParseWarning && !errorMsg.empty() && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + if (!mIgnoreParseWarning && !errorMsg.empty() && AlarmManager::GetInstance()->IsLowLevelAlarmValid()) { LOG_WARNING(sLogger, ("failed to parse log line, errorMsg", errorMsg)("container runtime", containerType)( "processor", sName)("config", mContext->GetConfigName())); errorMsg = "failed to parse log line, error: " + errorMsg + "\tcontainer runtime: " + containerType.to_string() + "\tprocessor: " + sName + "\tconfig: " + mContext->GetConfigName(); - LogtailAlarm::GetInstance()->SendAlarm(PARSE_LOG_FAIL_ALARM, + AlarmManager::GetInstance()->SendAlarm(PARSE_LOG_FAIL_ALARM, errorMsg, GetContext().GetProjectName(), GetContext().GetLogstoreName(), diff --git a/core/plugin/processor/inner/ProcessorSplitMultilineLogStringNative.cpp b/core/plugin/processor/inner/ProcessorSplitMultilineLogStringNative.cpp index 0edbac119d..bb09a535cb 100644 --- a/core/plugin/processor/inner/ProcessorSplitMultilineLogStringNative.cpp +++ b/core/plugin/processor/inner/ProcessorSplitMultilineLogStringNative.cpp @@ -341,7 +341,7 @@ void ProcessorSplitMultilineLogStringNative::HandleUnmatchLogs(const StringView& fisrtLogSize = content.size(); } } - if (!mMultiline.mIgnoringUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + if (!mMultiline.mIgnoringUnmatchWarning && AlarmManager::GetInstance()->IsLowLevelAlarmValid()) { LOG_WARNING(mContext->GetLogger(), ("unmatched log string", "please check regex")( "action", UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment))( diff --git a/core/prometheus/PromSelfMonitor.cpp b/core/prometheus/PromSelfMonitor.cpp index 5852fc45ae..17f2e701e1 100644 --- a/core/prometheus/PromSelfMonitor.cpp +++ b/core/prometheus/PromSelfMonitor.cpp @@ -4,7 +4,7 @@ #include #include -#include "monitor/LoongCollectorMetricTypes.h" +#include "monitor/MetricTypes.h" #include "monitor/metric_constants/MetricConstants.h" using namespace std; diff --git a/core/prometheus/PromSelfMonitor.h b/core/prometheus/PromSelfMonitor.h index 5c07db5005..002e8f88ef 100644 --- a/core/prometheus/PromSelfMonitor.h +++ b/core/prometheus/PromSelfMonitor.h @@ -4,7 +4,7 @@ #include #include -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "monitor/PluginMetricManager.h" namespace logtail { diff --git a/core/prometheus/PrometheusInputRunner.h b/core/prometheus/PrometheusInputRunner.h index 08e0e15626..89ae961ce1 100644 --- a/core/prometheus/PrometheusInputRunner.h +++ b/core/prometheus/PrometheusInputRunner.h @@ -22,8 +22,8 @@ #include "common/Lock.h" #include "common/timer/Timer.h" -#include "monitor/LogtailMetric.h" -#include "monitor/LoongCollectorMetricTypes.h" +#include "monitor/MetricManager.h" +#include "monitor/MetricTypes.h" #include "prometheus/schedulers/TargetSubscriberScheduler.h" #include "runner/InputRunner.h" #include "sdk/Common.h" diff --git a/core/prometheus/schedulers/ScrapeScheduler.h b/core/prometheus/schedulers/ScrapeScheduler.h index a9a526924d..6a606627f2 100644 --- a/core/prometheus/schedulers/ScrapeScheduler.h +++ b/core/prometheus/schedulers/ScrapeScheduler.h @@ -23,7 +23,7 @@ #include "common/http/HttpResponse.h" #include "common/timer/Timer.h" #include "models/PipelineEventGroup.h" -#include "monitor/LoongCollectorMetricTypes.h" +#include "monitor/MetricTypes.h" #include "pipeline/queue/QueueKey.h" #include "prometheus/Constants.h" #include "prometheus/PromSelfMonitor.h" diff --git a/core/provider/CMakeLists.txt b/core/provider/CMakeLists.txt index b0e0feb6c3..5c3cccbb50 100644 --- a/core/provider/CMakeLists.txt +++ b/core/provider/CMakeLists.txt @@ -18,7 +18,7 @@ project(provider) file(GLOB LIB_SOURCE_FILES *.cpp *.h) set(PROVIDER_SUB_DIRECTORIES_LIST - profile_sender config/feedbacker config/provider config/common_provider protobuf/config_server/v1 protobuf/config_server/v2 + monitor/profile_sender config/feedbacker config/provider config/common_provider protobuf/config_server/v1 protobuf/config_server/v2 ) foreach(DIR_NAME IN LISTS PROVIDER_SUB_DIRECTORIES_LIST) diff --git a/core/provider/Provider.h b/core/provider/Provider.h index 999aaa8a62..90712baf78 100644 --- a/core/provider/Provider.h +++ b/core/provider/Provider.h @@ -17,7 +17,7 @@ #pragma once #include "config/provider/ConfigProvider.h" -#include "profile_sender/ProfileSender.h" +#include "monitor/profile_sender/ProfileSender.h" namespace logtail { /* diff --git a/core/runner/FlusherRunner.cpp b/core/runner/FlusherRunner.cpp index c98acd7bf2..1f9f808371 100644 --- a/core/runner/FlusherRunner.cpp +++ b/core/runner/FlusherRunner.cpp @@ -20,7 +20,7 @@ #include "common/StringTools.h" #include "common/http/HttpRequest.h" #include "logger/Logger.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "pipeline/plugin/interface/HttpFlusher.h" #include "pipeline/queue/QueueKeyManager.h" #include "pipeline/queue/SenderQueueItem.h" diff --git a/core/runner/FlusherRunner.h b/core/runner/FlusherRunner.h index e8fbf23e0f..e23856aed9 100644 --- a/core/runner/FlusherRunner.h +++ b/core/runner/FlusherRunner.h @@ -20,7 +20,7 @@ #include #include -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "pipeline/plugin/interface/Flusher.h" #include "pipeline/queue/SenderQueueItem.h" #include "runner/sink/SinkType.h" diff --git a/core/runner/ProcessorRunner.cpp b/core/runner/ProcessorRunner.cpp index 7a0963d542..37e41da27b 100644 --- a/core/runner/ProcessorRunner.cpp +++ b/core/runner/ProcessorRunner.cpp @@ -20,7 +20,7 @@ #include "go_pipeline/LogtailPlugin.h" #include "models/EventPool.h" #include "monitor/LogFileProfiler.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "monitor/metric_constants/MetricConstants.h" #include "pipeline/PipelineManager.h" #include "queue/ExactlyOnceQueueManager.h" diff --git a/core/runner/ProcessorRunner.h b/core/runner/ProcessorRunner.h index 4ce4d37988..6841569edc 100644 --- a/core/runner/ProcessorRunner.h +++ b/core/runner/ProcessorRunner.h @@ -23,7 +23,7 @@ #include #include "models/PipelineEventGroup.h" -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" #include "pipeline/queue/QueueKey.h" namespace logtail { diff --git a/core/runner/sink/http/HttpSink.h b/core/runner/sink/http/HttpSink.h index 939680fdfe..ad788adee8 100644 --- a/core/runner/sink/http/HttpSink.h +++ b/core/runner/sink/http/HttpSink.h @@ -25,7 +25,7 @@ #include "runner/sink/Sink.h" #include "runner/sink/http/HttpSinkRequest.h" -#include "monitor/LogtailMetric.h" +#include "monitor/MetricManager.h" namespace logtail { diff --git a/core/unittest/config/ConfigUpdatorUnittest.cpp b/core/unittest/config/ConfigUpdatorUnittest.cpp index 54d49ae32c..13e4846bd3 100644 --- a/core/unittest/config/ConfigUpdatorUnittest.cpp +++ b/core/unittest/config/ConfigUpdatorUnittest.cpp @@ -33,7 +33,7 @@ #include "file_server/event_handler/LogInput.h" #include "Sender.h" #include "sls_logs.pb.h" -#include "LogtailAlarm.h" +#include "AlarmManager.h" #include "common/Flags.h" #include "common/Lock.h" #include "constants/Constants.h" diff --git a/core/unittest/input/InputNetworkObserverUnittest.cpp b/core/unittest/input/InputNetworkObserverUnittest.cpp index a53bf43264..f6e4c7b6ef 100644 --- a/core/unittest/input/InputNetworkObserverUnittest.cpp +++ b/core/unittest/input/InputNetworkObserverUnittest.cpp @@ -152,7 +152,6 @@ void InputNetworkObserverUnittest::OnSuccessfulStart() { unique_ptr input; Json::Value configJson, optionalGoPipeline; string configStr, errorMsg; - uint32_t pluginIdx = 0; configStr = R"( { diff --git a/core/unittest/monitor/CMakeLists.txt b/core/unittest/monitor/CMakeLists.txt index 12577fc957..aa6544d052 100644 --- a/core/unittest/monitor/CMakeLists.txt +++ b/core/unittest/monitor/CMakeLists.txt @@ -13,14 +13,14 @@ # limitations under the License. cmake_minimum_required(VERSION 3.22) -project(logtail_metric_unittest) +project(metric_manager_unittest) -add_executable(logtail_metric_unittest LogtailMetricUnittest.cpp) -target_link_libraries(logtail_metric_unittest ${UT_BASE_TARGET}) +add_executable(metric_manager_unittest MetricManagerUnittest.cpp) +target_link_libraries(metric_manager_unittest ${UT_BASE_TARGET}) add_executable(plugin_metric_manager_unittest PluginMetricManagerUnittest.cpp) target_link_libraries(plugin_metric_manager_unittest ${UT_BASE_TARGET}) include(GoogleTest) -gtest_discover_tests(logtail_metric_unittest) +gtest_discover_tests(metric_manager_unittest) gtest_discover_tests(plugin_metric_manager_unittest) diff --git a/core/unittest/monitor/LogtailMetricUnittest.cpp b/core/unittest/monitor/MetricManagerUnittest.cpp similarity index 95% rename from core/unittest/monitor/LogtailMetricUnittest.cpp rename to core/unittest/monitor/MetricManagerUnittest.cpp index 30d5822e47..3625105227 100644 --- a/core/unittest/monitor/LogtailMetricUnittest.cpp +++ b/core/unittest/monitor/MetricManagerUnittest.cpp @@ -18,7 +18,7 @@ #include #include #include -#include "LogtailMetric.h" +#include "MetricManager.h" #include "MetricExportor.h" #include "MetricConstants.h" @@ -28,7 +28,7 @@ namespace logtail { static std::atomic_bool running(true); -class ILogtailMetricUnittest : public ::testing::Test { +class MetricManagerUnittest : public ::testing::Test { public: void SetUp() {} @@ -42,12 +42,12 @@ class ILogtailMetricUnittest : public ::testing::Test { void TestCreateAndDeleteMetric(); }; -APSARA_UNIT_TEST_CASE(ILogtailMetricUnittest, TestCreateMetricAutoDelete, 0); -APSARA_UNIT_TEST_CASE(ILogtailMetricUnittest, TestCreateMetricAutoDeleteMultiThread, 1); -APSARA_UNIT_TEST_CASE(ILogtailMetricUnittest, TestCreateAndDeleteMetric, 2); +APSARA_UNIT_TEST_CASE(MetricManagerUnittest, TestCreateMetricAutoDelete, 0); +APSARA_UNIT_TEST_CASE(MetricManagerUnittest, TestCreateMetricAutoDeleteMultiThread, 1); +APSARA_UNIT_TEST_CASE(MetricManagerUnittest, TestCreateAndDeleteMetric, 2); -void ILogtailMetricUnittest::TestCreateMetricAutoDelete() { +void MetricManagerUnittest::TestCreateMetricAutoDelete() { std::vector> labels; labels.emplace_back(std::make_pair("project", "project1")); labels.emplace_back(std::make_pair("logstore", "logstore1")); @@ -150,7 +150,7 @@ void createMetrics(int count) { } } -void ILogtailMetricUnittest::TestCreateMetricAutoDeleteMultiThread() { +void MetricManagerUnittest::TestCreateMetricAutoDeleteMultiThread() { std::thread t1(createMetrics, 1); std::thread t2(createMetrics, 2); std::thread t3(createMetrics, 3); @@ -199,7 +199,7 @@ void ILogtailMetricUnittest::TestCreateMetricAutoDeleteMultiThread() { } -void ILogtailMetricUnittest::TestCreateAndDeleteMetric() { +void MetricManagerUnittest::TestCreateAndDeleteMetric() { std::thread t1(createMetrics, 1); std::thread t2(createMetrics, 2); diff --git a/core/unittest/polling/PollingUnittest.cpp b/core/unittest/polling/PollingUnittest.cpp index 3efdc1700b..fe273688bd 100644 --- a/core/unittest/polling/PollingUnittest.cpp +++ b/core/unittest/polling/PollingUnittest.cpp @@ -45,7 +45,7 @@ #include #include "protobuf/sls/metric.pb.h" #include "protobuf/sls/sls_logs.pb.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "file_server/event_handler/LogInput.h" #include "common/FileEncryption.h" #include "common/FileSystemUtil.h" diff --git a/core/unittest/sender/SenderUnittest.cpp b/core/unittest/sender/SenderUnittest.cpp index 44f14724aa..e2ac680333 100644 --- a/core/unittest/sender/SenderUnittest.cpp +++ b/core/unittest/sender/SenderUnittest.cpp @@ -58,7 +58,7 @@ #include "file_server/event_handler/LogInput.h" #include "logger/Logger.h" #include "monitor/LogIntegrity.h" -#include "monitor/LogtailAlarm.h" +#include "monitor/AlarmManager.h" #include "protobuf/sls/metric.pb.h" #include "protobuf/sls/sls_logs.pb.h" #include "runner/ProcessorRunner.h" From 825261612c395403593a351018ac3612bed8edb3 Mon Sep 17 00:00:00 2001 From: bilosikia Date: Wed, 13 Nov 2024 15:43:02 +0800 Subject: [PATCH 10/10] feat: add configer provider info report interface (#1871) --- config_server/protocol/v2/README.md | 121 ++++++++++++++---- config_server/protocol/v2/agentV2.proto | 55 +++++--- .../common_provider/CommonConfigProvider.cpp | 8 +- 3 files changed, 137 insertions(+), 47 deletions(-) diff --git a/config_server/protocol/v2/README.md b/config_server/protocol/v2/README.md index 567151584b..53e096f2bd 100644 --- a/config_server/protocol/v2/README.md +++ b/config_server/protocol/v2/README.md @@ -18,11 +18,11 @@ bytes instance_id = 4; // Required, Agent's unique identification, consistent throughout the process lifecycle string agent_type = 5; // Required, Agent's type(ilogtail, ..) AgentAttributes attributes = 6; // Agent's basic attributes - repeated AgentGroupTag tags = 7; // Agent's tags + repeated AgentGroupTag tags = 7; // Agent's tags string running_status = 8; // Human readable running status int64 startup_time = 9; // Required, Agent's startup time repeated ConfigInfo pipeline_configs = 10; // Information about the current PIPELINE_CONFIG held by the Agent - repeated ConfigInfo instance_configs = 11; // Information about the current AGENT_CONFIG held by the Agent + repeated ConfigInfo instance_configs = 11; // Information about the current AGENT_CONFIG held by the Agent repeated CommandInfo custom_commands = 12; // Information about command history uint64 flags = 13; // Predefined command flag bytes opaque = 14; // Opaque data for extension @@ -52,7 +52,7 @@ int64 version = 2; // Required, Config's version number or hash code ConfigStatus status = 3; // Config's status string message = 4; // Optional error message - map extra = 5; // Optional extra info + map extra = 5; // Optional extra info } // Define the Command information carried in the request @@ -104,7 +104,7 @@ uint64 capabilities = 3; // Bitmask of flags defined by ServerCapabilities enum repeated ConfigDetail pipeline_config_updates = 4; // Agent's pipeline config update status - repeated ConfigDetail instance_config_updates = 5; // Agent's instance config update status + repeated ConfigDetail instance_config_updates = 5; // Agent's instance config update status repeated CommandDetail custom_command_updates = 6; // Agent's commands updates uint64 flags = 7; // Predefined command flag bytes opaque = 8; // Opaque data for extension @@ -127,13 +127,13 @@ enum ServerCapabilities { // The capabilities field is unspecified. - UnspecifiedServerCapability = 0; + UnspecifiedServerCapability = 0; // The Server can remember agent attributes. RembersAttribute = 0x00000001; // The Server can remember pipeline config status. RembersPipelineConfigStatus = 0x00000002; // The Server can remember instance config status. - RembersInstanceConfigStatus = 0x00000004; + RembersInstanceConfigStatus = 0x00000004; // The Server can remember custom command status. RembersCustomCommandStatus = 0x00000008; @@ -141,8 +141,8 @@ } message ServerErrorResponse { - int32 error_code = 1; // None-zero value indicates error - string error_message = 2; // Error message + int32 error_code = 1; // None-zero value indicates error + string error_message = 2; // Error message } enum ResponseFlags { @@ -155,11 +155,55 @@ // optimization) but the Server detects that it does not have it (e.g. was // restarted and lost state). ReportFullState = 0x00000001; + // FetchPipelineConfigDetail can be used by the Server to tell Agent to fetch config details by FetchConfig api, + // HB response ConfigDetail will not contains details. FetchPipelineConfigDetail = 0x00000002; - FetchInstanceConfigDetail = 0x00000004; + // like FetchPipelineConfigDetail, but for instance config. + FetchInstanceConfigDetail = 0x00000004; // bits before 2^16 (inclusive) are reserved for future official fields } +### [Optional] FetchConfigRequest 消息 + +额外的 config 拉取接口,不通过心跳返回 config 详情。 + + message FetchConfigRequest { + bytes request_id = 1; + bytes instance_id = 2; // Agent's unique identification + repeated ConfigInfo pipeline_configs = 3; // Information about the current PIPELINE_CONFIG held by the Agent + repeated ConfigInfo instance_configs = 4; // Information about the current AGENT_CONFIG held by the Agent + repeated CommandInfo custom_commands = 5; // Information about command history + } + +### [Optional] FetchConfigResponse 消息 + + message FetchConfigResponse { + bytes request_id = 1; + CommonResponse commonResponse = 2; + repeated ConfigDetail pipeline_config_updates = 3; // Agent's pipeline config with details + repeated ConfigDetail instance_config_updates = 4; // Agent's instance config with details + repeated CommandDetail custom_command_updates = 5; // Agent's commands details + } + +### [Optional] ReportStatusRequest 消息 + +额外的 config 应用状态上报接口,不依赖于等到下次心跳上报。适用于心跳和 config 状态服务拆分的实现。 + + message ReportStatusRequest { + bytes request_id = 1; + bytes instance_id = 2; // Agent's unique identification + repeated ConfigInfo pipeline_configs = 3; // status about the current PIPELINE_CONFIG held by the Agent + repeated ConfigInfo instance_configs = 4; // status about the current AGENT_CONFIG held by the Agent + repeated CommandInfo custom_commands = 5; // status about command history + } + +### [Optional] ReportStatusResponse 消息 + + message ReportStatusResponse { + bytes request_id = 1; + CommonResponse commonResponse = 2; + } + ## 行为规范 对于管控协议来说 iLogtail 的预期行为是确定性的,对于实现本管控协议的其他 Agent 其具体行为可自行确定,但语义应保持一致。Server 端定义了可选的行为的不同实现,此时对于这些差异 Agent 侧在实现时必须都考虑到且做好兼容。这样,Agent只需要实现一个CommonConfigProvider就可以受任意符合此协议规范的ConfigServer管控。 @@ -174,7 +218,7 @@ Server:应当通过capbilitiies上报Server自身的能力,这样如果新 Client:Agent启动后第一次向Server汇报全量信息,request字段应填尽填。request\_id、sequence\_num、capabilities、instance\_id、agent\_type、startup\_time为必填字段。 -Server:Server根据上报的信息返回响应。pipeline\_config\_updates、instance\_config\_updates中包含agent需要同步的配置,updates中必然包含name和version,是否包含detail取决于server端实现。custom\_command_updates包含要求agent执行的命令command中必然包含type、name和expire\_time。 +Server:Server根据上报的信息返回响应。pipeline\_config\_updates、instance\_config\_updates中包含agent需要同步的配置,updates中必然包含name和version,是否包含detail取决于server端实现, 如果不包含则需要通过 FetchConfig 拉取。custom\_command_updates包含要求agent执行的命令command中必然包含type、name和expire\_time。 Server是否保存Client信息取决于Server实现,如果服务端找不到或保存的sequence\_num + 1 ≠ 心跳的sequence\_num,那么就立刻返回并且flags中必须设置ReportFullStatus标识位。 @@ -198,45 +242,68 @@ Server:同注册 ### 进程配置 -若Server的注册/心跳响应中有instance\_config\_updates.detail - -Client:直接从response中获得detail,应用成功后下次心跳需要上报完整状态。 +可选两种实现: +1. 在心跳中完成进程配置的状态上报与同步。 -若Server的响应不包含detail + Server的注册/心跳响应中有instance\_config\_updates.detail,client 直接从response中获得detail,应用成功后下次心跳需要上报完整状态。 + +2. 在心跳中完成进程配置的基础信息同步,通过额外的接口完成进程配置的拉取。 -Client:根据instance\_config\_updates的信息构造FetchInstanceConfigRequest + Server的响应不包含detail, 只包含要更新的进程配置 name 和 version。client 比较本地的配置和 version 判断需要更新后,根据 instance_config_updates 的信息构造 FetchConfigRequest 后进行一次额外拉取。FetchConfigRequest 至少需要包括 name 和 version。 -Server:返回FetchInstanceConfigResponse + 心跳 response flag 需要设置 FetchInstanceConfigDetail. Client获取到多个进程配置时,自动合并,若产生冲突默认行为是未定义。 ### 采集配置 -若Server的注册/心跳响应中有pipeline\_config\_updates.detail +可选两种实现: +1. 在心跳中完成采集配置的状态上报与同步。 -Client:直接从response中获得detail,应用成功后下次心跳需要上报完整状态。 + Server的注册/心跳响应中有pipeline\_config\_updates.detail, Client 直接从response中获得detail,应用成功后下次心跳需要上报完整状态。 -若Server的响应不包含detail +2. 在心跳中完成采集配置的基础信息同步,通过额外的接口完成进程配置的拉取。 -Client:根据pipeline\_config\_updates的信息构造FetchPipelineConfigRequest + Server的响应不包含detail, 只包含要更新的采集配置 name 和 version。client 比较本地的配置和 version 判断需要更新后,根据 pipeline_config_updates 的信息构造 FetchConfigRequest 后进行一次额外拉取。FetchConfigRequest 至少需要包括 name 和 version。 -Server:返回FetchPipelineConfigResponse + 心跳 response flag 需要设置 FetchPipelineConfigDetail. -客户端支持以下2种实现 +客户端以下2种实现 -实现1:直接将Detail返回在心跳响应中(FetchConfigDetail flag is unset) +实现1:直接将Detail返回在心跳响应中(FetchPipelineConfigDetail flag is unset) ![image](https://github.com/alibaba/ilogtail/assets/1827594/be645615-dd99-42dd-9deb-681e9a4069bb) -实现2:仅返回配置名和版本,Detail使用单独请求获取(FetchConfigDetail flag is set) +实现2:仅返回配置名和版本,Detail使用单独请求获取(FetchPipelineConfigDetail flag is set) ![image](https://github.com/alibaba/ilogtail/assets/1827594/c409c35c-2a81-4927-bfd2-7fb321ef1ca8) ### 配置状态上报 -Client:这个版本的配置状态上报中修改了version的定义,-1仍然表示删除,0作为保留值,其他值都是合法version,只要version不同Client都应该视为配置更新。此外参考OpAMP增加了配置应用状态上报的字段,能反应出下发的配置是否生效。 +不管是进程配置还是采集配置,下发的配置需要上报应用状态。 + +对于 Client:这个版本的配置状态上报中修改了version的定义,-1仍然表示删除,0作为保留值,其他值都是合法version,只要version不同Client都应该视为配置更新。此外参考OpAMP增加了配置应用状态上报的字段,能反应出下发的配置是否生效。 + +对于 Server:这些信息是Agent状态的一部分,可选保存。与通过Event上报可观测信息不同的是,作为状态信息没有时间属性,用户可通过接口可获取即刻状态,而不需要选择时间窗口合并事件。 + +同进程配置和采集配置,上报配置状态也有两种可选实现: +1. 在心跳 request 中将配置最新状态带上。 + + 在心跳中将进程配置和采集配置的最新版本和状态一起上报。另外按照心跳协议的定义,配置状态变更后,要求在心跳一定要上报配置最新状态,如果相较于上一次心跳配置状态无变化,则不要求。 + +2. 通过 ReportStatus 接口上报。适合对状态更新实时性要求比较高,或对心跳服务、配置服务、状态服务拆分以减少故障半径的实现。 + + 通过 ReportStatus 额外接口去上报,能够在一定程度上减少心跳服务的复杂度,有利于状态服务和心跳服务的拆分。ReportStatus 接口不用等到下一次心跳,在配置状态发生变化即可上报。 + +### 心跳配置拉取/上报与额外接口拉取/上报选择 +配置状态上报的方式应该和配置拉取方式配套使用: +1. 如果进程配置和采集配置都通过心跳下发,状态配置也仅应该通过心跳上报。 +2. 如果进程配置和采集配置都通过 FetchConfig 接口拉取,状态上报也应该通过 ReportStatus 上报。 + +虽然其他的组合方式也能完成配置的下发和状态上报,但会导致服务拆分和服务职责不清晰的问题。如无特殊要求,建议通过心跳完成配置下发和上报。 -Server:这些信息是Agent状态的一部分,可选保存。与通过Event上报可观测信息不同的是,作为状态信息没有时间属性,用户可通过接口可获取即刻状态,而不需要选择时间窗口合并事件。 +通过额外接口拉取配置和状态上报流程: +![image](https://github.com/user-attachments/assets/07224fe6-9454-4fcb-9c56-2a46eee33f0b) ### 预定义命令 @@ -259,4 +326,4 @@ Client: 当HeartbeatResponse中的code为0时,Agent应该正常处理下发的 ### 辅助信息 在command\_info, command\_detail, config\_info, config\_detail中,都预留了extra字段,可以用于传递一些额外的用户自定义的辅助信息。\ -注意:extra字段仅作传递辅助信息使用,不会对管控行为造成任何影响。 \ No newline at end of file +注意:extra字段仅作传递辅助信息使用,不会对管控行为造成任何影响。 diff --git a/config_server/protocol/v2/agentV2.proto b/config_server/protocol/v2/agentV2.proto index bc99338d58..649cd433da 100644 --- a/config_server/protocol/v2/agentV2.proto +++ b/config_server/protocol/v2/agentV2.proto @@ -25,7 +25,7 @@ message ConfigInfo { int64 version = 2; // Required, Config's version number or hash code ConfigStatus status = 3; // Config's status string message = 4; // Optional error message - map extra = 5; // Optional extra info + map extra = 5; // Optional extra info } // Define the Command information carried in the request @@ -34,7 +34,7 @@ message CommandInfo { string name = 2; // Required, Command's unique identification ConfigStatus status = 3; // Command's status string message = 4; // Optional error message - map extra = 5; // Optional extra info + map extra = 5; // Optional extra info } // Define Agent's basic attributes @@ -49,11 +49,11 @@ message AgentAttributes { enum AgentCapabilities { // The capabilities field is unspecified. - UnspecifiedAgentCapability = 0; + UnspecifiedAgentCapability = 0; // The Agent can accept pipeline configuration from the Server. AcceptsPipelineConfig = 0x00000001; // The Agent can accept instance configuration from the Server. - AcceptsInstanceConfig = 0x00000002; + AcceptsInstanceConfig = 0x00000002; // The Agent can accept custom command from the Server. AcceptsCustomCommand = 0x00000004; @@ -80,7 +80,7 @@ message HeartbeatRequest { bytes instance_id = 4; // Required, Agent's unique identification, consistent throughout the process lifecycle string agent_type = 5; // Required, Agent's type(ilogtail, ..) AgentAttributes attributes = 6; // Agent's basic attributes - repeated AgentGroupTag tags = 7; // Agent's tags + repeated AgentGroupTag tags = 7; // Agent's tags string running_status = 8; // Human readable running status int64 startup_time = 9; // Required, Agent's startup time repeated ConfigInfo pipeline_configs = 10; // Information about the current PIPELINE_CONFIG held by the Agent @@ -96,7 +96,7 @@ message ConfigDetail { string name = 1; // Required, Config's unique identification int64 version = 2; // Required, Config's version number or hash code bytes detail = 3; // Required, Config's detail - map extra = 4; // Optional extra info + map extra = 4; // Optional extra info } message CommandDetail { @@ -109,13 +109,13 @@ message CommandDetail { enum ServerCapabilities { // The capabilities field is unspecified. - UnspecifiedServerCapability = 0; + UnspecifiedServerCapability = 0; // The Server can remember agent attributes. RembersAttribute = 0x00000001; // The Server can remember pipeline config status. RembersPipelineConfigStatus = 0x00000002; // The Server can remember instance config status. - RembersInstanceConfigStatus = 0x00000004; + RembersInstanceConfigStatus = 0x00000004; // The Server can remember custom command status. RembersCustomCommandStatus = 0x00000008; @@ -132,7 +132,10 @@ enum ResponseFlags { // optimization) but the Server detects that it does not have it (e.g. was // restarted and lost state). ReportFullState = 0x00000001; + // FetchPipelineConfigDetail can be used by the Server to tell Agent to fetch config details by FetchConfig api, + // HB response ConfigDetail will not contains details. FetchPipelineConfigDetail = 0x00000002; + // like FetchPipelineConfigDetail, but for instance config. FetchInstanceConfigDetail = 0x00000004; // bits before 2^16 (inclusive) are reserved for future official fields } @@ -150,24 +153,44 @@ message HeartbeatResponse { bytes opaque = 8; // Opaque data for extension } -// API: /Agent/FetchPipelineConfig/ -// API: /Agent/FetchInstanceConfig/ -// Agent request to ConfigServer, pulling details of the config +// API: /Agent/FetchConfig +// optional api for fetching configs details, but not by heartbeat response with config details, see README. message FetchConfigRequest { bytes request_id = 1; - bytes instance_id = 2; // Agent's unique identification - repeated ConfigInfo req_configs = 3; // Config's name and version/hash + bytes instance_id = 2; // Agent's unique identification + repeated ConfigInfo pipeline_configs = 3; // Information about the current PIPELINE_CONFIG held by the Agent + repeated ConfigInfo instance_configs = 4; // Information about the current AGENT_CONFIG held by the Agent + repeated CommandInfo custom_commands = 5; // Information about command history } -// ConfigServer response to Agent's request +// ConfigServer response to Agent's config fetching request message FetchConfigResponse { bytes request_id = 1; CommonResponse commonResponse = 2; - repeated ConfigDetail config_details = 3; // config detail + repeated ConfigDetail pipeline_config_updates = 3; // Agent's pipeline config with details + repeated ConfigDetail instance_config_updates = 4; // Agent's instance config with details + repeated CommandDetail custom_command_updates = 5; // Agent's commands details +} + +// API: /Agent/ReportStatus +// optional api for report config status, but not wait util next heartbeat, see README. +// if HB server and Status server are different service, this api may be help. +message ReportStatusRequest { + bytes request_id = 1; + bytes instance_id = 2; // Agent's unique identification + repeated ConfigInfo pipeline_configs = 3; // status about the current PIPELINE_CONFIG held by the Agent + repeated ConfigInfo instance_configs = 4; // status about the current AGENT_CONFIG held by the Agent + repeated CommandInfo custom_commands = 5; // status about command history +} + +// ConfigServer response to Agent's report status request +message ReportStatusResponse { + bytes request_id = 1; + CommonResponse commonResponse = 2; } message CommonResponse { int32 status = 1; bytes errorMessage = 2; -} \ No newline at end of file +} diff --git a/core/config/common_provider/CommonConfigProvider.cpp b/core/config/common_provider/CommonConfigProvider.cpp index 4551e2a127..1a5edcfd31 100644 --- a/core/config/common_provider/CommonConfigProvider.cpp +++ b/core/config/common_provider/CommonConfigProvider.cpp @@ -509,7 +509,7 @@ bool CommonConfigProvider::FetchInstanceConfigFromServer( fetchConfigRequest.set_request_id(requestID); fetchConfigRequest.set_instance_id(GetInstanceId()); for (const auto& config : heartbeatResponse.instance_config_updates()) { - auto reqConfig = fetchConfigRequest.add_req_configs(); + auto reqConfig = fetchConfigRequest.add_instance_configs(); reqConfig->set_name(config.name()); reqConfig->set_version(config.version()); } @@ -522,7 +522,7 @@ bool CommonConfigProvider::FetchInstanceConfigFromServer( operation, reqBody, "FetchInstanceConfig", fetchConfigRequest.request_id(), fetchConfigResponse)) { configserver::proto::v2::FetchConfigResponse fetchConfigResponsePb; fetchConfigResponsePb.ParseFromString(fetchConfigResponse); - res.Swap(fetchConfigResponsePb.mutable_config_details()); + res.Swap(fetchConfigResponsePb.mutable_instance_config_updates()); return true; } return false; @@ -536,7 +536,7 @@ bool CommonConfigProvider::FetchPipelineConfigFromServer( fetchConfigRequest.set_request_id(requestID); fetchConfigRequest.set_instance_id(GetInstanceId()); for (const auto& config : heartbeatResponse.pipeline_config_updates()) { - auto reqConfig = fetchConfigRequest.add_req_configs(); + auto reqConfig = fetchConfigRequest.add_pipeline_configs(); reqConfig->set_name(config.name()); reqConfig->set_version(config.version()); } @@ -549,7 +549,7 @@ bool CommonConfigProvider::FetchPipelineConfigFromServer( operation, reqBody, "FetchPipelineConfig", fetchConfigRequest.request_id(), fetchConfigResponse)) { configserver::proto::v2::FetchConfigResponse fetchConfigResponsePb; fetchConfigResponsePb.ParseFromString(fetchConfigResponse); - res.Swap(fetchConfigResponsePb.mutable_config_details()); + res.Swap(fetchConfigResponsePb.mutable_pipeline_config_updates()); return true; } return false;