From ac577d5db252b7bdd2ef89a5f475e48da8a357c5 Mon Sep 17 00:00:00 2001 From: Ryan Yin Date: Fri, 22 Nov 2024 09:53:13 +0800 Subject: [PATCH] nixos/victoriametrics: check config, more tests, update desc --- .../services/databases/victoriametrics.nix | 10 +- nixos/modules/services/monitoring/vmagent.nix | 55 ++++-- nixos/modules/services/monitoring/vmalert.nix | 10 +- nixos/tests/all-tests.nix | 2 +- nixos/tests/victoriametrics.nix | 41 ---- nixos/tests/victoriametrics/default.nix | 10 + nixos/tests/victoriametrics/remote-write.nix | 103 ++++++++++ nixos/tests/victoriametrics/vmalert.nix | 179 ++++++++++++++++++ 8 files changed, 351 insertions(+), 59 deletions(-) delete mode 100644 nixos/tests/victoriametrics.nix create mode 100644 nixos/tests/victoriametrics/default.nix create mode 100644 nixos/tests/victoriametrics/remote-write.nix create mode 100644 nixos/tests/victoriametrics/vmalert.nix diff --git a/nixos/modules/services/databases/victoriametrics.nix b/nixos/modules/services/databases/victoriametrics.nix index b26ad47c9a1368..075e6a68fc3fa0 100644 --- a/nixos/modules/services/databases/victoriametrics.nix +++ b/nixos/modules/services/databases/victoriametrics.nix @@ -31,7 +31,15 @@ let in { options.services.victoriametrics = { - enable = mkEnableOption "VictoriaMetrics is a fast, cost-effective and scalable monitoring solution and time series database."; + enable = lib.mkOption { + type = lib.types.bool; + default = false; + description = '' + Whether to enable VictoriaMetrics in single-node mode. + + VictoriaMetrics is a fast, cost-effective and scalable monitoring solution and time series database. + ''; + }; package = mkPackageOption pkgs "victoriametrics" { }; listenAddress = mkOption { diff --git a/nixos/modules/services/monitoring/vmagent.nix b/nixos/modules/services/monitoring/vmagent.nix index 4838e0709d09e3..0dc6c8f0f79066 100644 --- a/nixos/modules/services/monitoring/vmagent.nix +++ b/nixos/modules/services/monitoring/vmagent.nix @@ -2,7 +2,32 @@ let cfg = config.services.vmagent; - settingsFormat = pkgs.formats.json { }; + settingsFormat = pkgs.formats.yaml {}; + + startCLIList = + [ + "${cfg.package}/bin/vmagent" + ] + ++ lib.optionals (cfg.remoteWrite.url != null) [ + "-remoteWrite.url=${cfg.remoteWrite.url}" + "-remoteWrite.tmpDataPath=%C/vmagent/remote_write_tmp" + ] + ++ lib.optional ( + cfg.remoteWrite.basicAuthUsername != null + ) "-remoteWrite.basicAuth.username=${cfg.remoteWrite.basicAuthUsername}" + ++ lib.optional ( + cfg.remoteWrite.basicAuthPasswordFile != null + ) "-remoteWrite.basicAuth.passwordFile=\${CREDENTIALS_DIRECTORY}/remote_write_basic_auth_password" + ++ cfg.extraArgs; + prometheusConfigYml = checkedConfig ( + settingsFormat.generate "prometheusConfig.yaml" cfg.prometheusConfig + ); + + checkedConfig = file: + pkgs.runCommand "checked-config" {nativeBuildInputs = [cfg.package];} '' + ln -s ${file} $out + ${lib.escapeShellArgs startCLIList} -promscrape.config=${file} -dryRun + ''; in { imports = [ (lib.mkRemovedOptionModule [ "services" "vmagent" "dataDir" ] "dataDir has been deprecated in favor of systemd provided CacheDirectory") @@ -12,7 +37,15 @@ in { ]; options.services.vmagent = { - enable = lib.mkEnableOption "vmagent"; + enable = lib.mkOption { + type = lib.types.bool; + default = false; + description = '' + Whether to enable VictoriaMetrics's `vmagent`. + + vmagent efficiently scrape metrics from Prometheus-compatible exporters + ''; + }; package = lib.mkPackageOption pkgs "vmagent" { }; @@ -69,18 +102,7 @@ in { config = lib.mkIf cfg.enable { networking.firewall.allowedTCPPorts = lib.mkIf cfg.openFirewall [ 8429 ]; - systemd.services.vmagent = let - prometheusConfig = settingsFormat.generate "prometheusConfig.yaml" cfg.prometheusConfig; - startCommandLine = lib.concatStringsSep " " ([ - "${cfg.package}/bin/vmagent" - "-promscrape.config=${prometheusConfig}" - ] ++ cfg.extraArgs - ++ lib.optionals (cfg.remoteWrite.url != null) [ - "-remoteWrite.url=${cfg.remoteWrite.url}" - "-remoteWrite.tmpDataPath=%C/vmagent/remote_write_tmp" - ] ++ lib.optional (cfg.remoteWrite.basicAuthUsername != null) "-remoteWrite.basicAuth.username=${cfg.remoteWrite.basicAuthUsername}" - ++ lib.optional (cfg.remoteWrite.basicAuthPasswordFile != null) "-remoteWrite.basicAuth.passwordFile=\${CREDENTIALS_DIRECTORY}/remote_write_basic_auth_password"); - in { + systemd.services.vmagent = { wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; description = "vmagent system service"; @@ -91,7 +113,10 @@ in { Type = "simple"; Restart = "on-failure"; CacheDirectory = "vmagent"; - ExecStart = startCommandLine; + ExecStart = lib.escapeShellArgs ( + startCLIList + ++ lib.optionals (cfg.prometheusConfig != null) ["-promscrape.config=${prometheusConfigYml}"] + ); LoadCredential = lib.optional (cfg.remoteWrite.basicAuthPasswordFile != null) [ "remote_write_basic_auth_password:${cfg.remoteWrite.basicAuthPasswordFile}" ]; diff --git a/nixos/modules/services/monitoring/vmalert.nix b/nixos/modules/services/monitoring/vmalert.nix index 65db6fab77db6e..14fdfbbda29acb 100644 --- a/nixos/modules/services/monitoring/vmalert.nix +++ b/nixos/modules/services/monitoring/vmalert.nix @@ -20,7 +20,15 @@ in { # interface options.services.vmalert = { - enable = mkEnableOption "vmalert"; + enable = lib.mkOption { + type = lib.types.bool; + default = false; + description = '' + Wether to enable VictoriaMetrics's `vmalert`. + + `vmalert` evaluates alerting and recording rules against a data source, sends notifications via Alertmanager. + ''; + }; package = mkPackageOption pkgs "victoriametrics" { }; diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index 14a1439eeae7f8..6d356292a5356d 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -1105,7 +1105,7 @@ in { vaultwarden = discoverTests (import ./vaultwarden.nix); vector = handleTest ./vector {}; vengi-tools = handleTest ./vengi-tools.nix {}; - victoriametrics = handleTest ./victoriametrics.nix {}; + victoriametrics = handleTest ./victoriametrics {}; vikunja = handleTest ./vikunja.nix {}; virtualbox = handleTestOn ["x86_64-linux"] ./virtualbox.nix {}; vscode-remote-ssh = handleTestOn ["x86_64-linux"] ./vscode-remote-ssh.nix {}; diff --git a/nixos/tests/victoriametrics.nix b/nixos/tests/victoriametrics.nix deleted file mode 100644 index e45d0a30f3a6f8..00000000000000 --- a/nixos/tests/victoriametrics.nix +++ /dev/null @@ -1,41 +0,0 @@ -# This test runs victoriametrics and checks if victoriametrics is able to write points and run simple query - -import ./make-test-python.nix ( - { pkgs, ... }: - { - name = "victoriametrics"; - meta = with pkgs.lib.maintainers; { - maintainers = [ - yorickvp - ryan4yin - ]; - }; - - nodes = { - one = - { ... }: - { - services.victoriametrics.enable = true; - }; - }; - - testScript = '' - start_all() - - one.wait_for_unit("victoriametrics.service") - - # write some points and run simple query - out = one.succeed( - "curl -f -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/write'" - ) - cmd = ( - """curl -f -s -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'""" - ) - # data takes a while to appear - one.wait_until_succeeds(f"[[ $({cmd} | wc -l) -ne 0 ]]") - out = one.succeed(cmd) - assert '"values":[123]' in out - assert '"values":[1.23]' in out - ''; - } -) diff --git a/nixos/tests/victoriametrics/default.nix b/nixos/tests/victoriametrics/default.nix new file mode 100644 index 00000000000000..a86d9ee60a9e61 --- /dev/null +++ b/nixos/tests/victoriametrics/default.nix @@ -0,0 +1,10 @@ +{ + system ? builtins.currentSystem, + config ? { }, + pkgs ? import ../../.. { inherit system config; }, +}: + +{ + remote-write = import ./remote-write.nix { inherit system pkgs; }; + vmalert = import ./vmalert.nix { inherit system pkgs; }; +} diff --git a/nixos/tests/victoriametrics/remote-write.nix b/nixos/tests/victoriametrics/remote-write.nix new file mode 100644 index 00000000000000..54a4cfc7a4657b --- /dev/null +++ b/nixos/tests/victoriametrics/remote-write.nix @@ -0,0 +1,103 @@ +# Primarily reference the implementation of +import ../make-test-python.nix ( + { + lib, + pkgs, + ... + }: + let + username = "vmtest"; + password = "fsddfy8233rb"; # random string + passwordFile = pkgs.writeText "password-file" password; + in + { + name = "victoriametrics-remote-write"; + meta = with pkgs.lib.maintainers; { + maintainers = [ + yorickvp + ryan4yin + ]; + }; + + nodes = { + victoriametrics = + { + config, + pkgs, + ... + }: + { + environment.systemPackages = [ pkgs.jq ]; + networking.firewall.allowedTCPPorts = [ 8428 ]; + services.victoriametrics = { + enable = true; + extraOptions = [ + "-httpAuth.username=${username}" + "-httpAuth.password=file://${toString passwordFile}" + ]; + }; + }; + + vmagent = + { + config, + pkgs, + ... + }: + { + environment.systemPackages = [ pkgs.jq ]; + services.vmagent = { + enable = true; + remoteWrite = { + url = "http://victoriametrics:8428/api/v1/write"; + basicAuthUsername = username; + basicAuthPasswordFile = passwordFile; + }; + + prometheusConfig = { + global = { + scrape_interval = "2s"; + }; + scrape_configs = [ + { + job_name = "node"; + static_configs = [ + { + targets = [ + "node:${toString config.services.prometheus.exporters.node.port}" + ]; + } + ]; + } + ]; + }; + }; + }; + + node = + { ... }: + { + services.prometheus.exporters.node = { + enable = true; + openFirewall = true; + }; + }; + }; + + testScript = '' + node.wait_for_unit("prometheus-node-exporter") + node.wait_for_open_port(9100) + + victoriametrics.wait_for_unit("victoriametrics") + victoriametrics.wait_for_open_port(8428) + + vmagent.wait_for_unit("vmagent") + + # check remote write + victoriametrics.wait_until_succeeds( + "curl --user '${username}:${password}' -sf 'http://localhost:8428/api/v1/query?query=node_exporter_build_info\{instance=\"node:9100\"\}' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + ''; + } +) diff --git a/nixos/tests/victoriametrics/vmalert.nix b/nixos/tests/victoriametrics/vmalert.nix new file mode 100644 index 00000000000000..f92c35a8201067 --- /dev/null +++ b/nixos/tests/victoriametrics/vmalert.nix @@ -0,0 +1,179 @@ +# Primarily reference the implementation of +import ../make-test-python.nix ( + { + lib, + pkgs, + ... + }: + { + name = "victoriametrics-vmalert"; + meta = with pkgs.lib.maintainers; { + maintainers = [ + yorickvp + ryan4yin + ]; + }; + + nodes = { + victoriametrics = + { + config, + pkgs, + ... + }: + { + environment.systemPackages = [ pkgs.jq ]; + networking.firewall.allowedTCPPorts = [ 8428 ]; + services.victoriametrics = { + enable = true; + prometheusConfig = { + global = { + scrape_interval = "2s"; + }; + scrape_configs = [ + { + job_name = "alertmanager"; + static_configs = [ + { + targets = [ + "alertmanager:${toString config.services.prometheus.alertmanager.port}" + ]; + } + ]; + } + { + job_name = "node"; + static_configs = [ + { + targets = [ + "node:${toString config.services.prometheus.exporters.node.port}" + ]; + } + ]; + } + ]; + }; + }; + + services.vmalert = { + enable = true; + settings = { + "datasource.url" = "http://localhost:8428"; # victoriametrics' api + "notifier.url" = [ + "http://alertmanager:${toString config.services.prometheus.alertmanager.port}" + ]; # alertmanager's api + rule = [ + (pkgs.writeText "instance-down.yml" '' + groups: + - name: test + rules: + - alert: InstanceDown + expr: up == 0 + for: 5s + labels: + severity: page + annotations: + summary: "Instance {{ $labels.instance }} down" + '') + ]; + }; + }; + }; + + alertmanager = + { + config, + pkgs, + ... + }: + { + services.prometheus.alertmanager = { + enable = true; + openFirewall = true; + + configuration = { + global = { + resolve_timeout = "1m"; + }; + + route = { + # Root route node + receiver = "test"; + group_by = [ "..." ]; + continue = false; + group_wait = "1s"; + group_interval = "15s"; + repeat_interval = "24h"; + }; + + receivers = [ + { + name = "test"; + webhook_configs = [ + { + url = "http://logger:6725"; + send_resolved = true; + max_alerts = 0; + } + ]; + } + ]; + }; + }; + }; + + logger = + { + config, + pkgs, + ... + }: + { + networking.firewall.allowedTCPPorts = [ 6725 ]; + + services.prometheus.alertmanagerWebhookLogger.enable = true; + }; + }; + + testScript = '' + alertmanager.wait_for_unit("alertmanager") + alertmanager.wait_for_open_port(9093) + alertmanager.wait_until_succeeds("curl -s http://127.0.0.1:9093/-/ready") + + logger.wait_for_unit("alertmanager-webhook-logger") + logger.wait_for_open_port(6725) + + victoriametrics.wait_for_unit("victoriametrics") + victoriametrics.wait_for_unit("vmalert") + victoriametrics.wait_for_open_port(8428) + + victoriametrics.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=count(up\{job=\"alertmanager\"\}==1)' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + + victoriametrics.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=sum(alertmanager_build_info)%20by%20(version)' | " + + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-alertmanager.version}\"'" + ) + + victoriametrics.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=count(up\{job=\"node\"\}!=1)' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + + victoriametrics.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=alertmanager_notifications_total\{integration=\"webhook\"\}' | " + + "jq '.data.result[0].value[1]' | grep -v '\"0\"'" + ) + + logger.wait_until_succeeds( + "journalctl -o cat -u alertmanager-webhook-logger.service | grep '\"alertname\":\"InstanceDown\"'" + ) + + logger.log(logger.succeed("systemd-analyze security alertmanager-webhook-logger.service | grep -v '✓'")) + + alertmanager.log(alertmanager.succeed("systemd-analyze security alertmanager.service | grep -v '✓'")) + ''; + } +)