From daf1a99a486dfd2b109ab601f48a1ee3aded6d40 Mon Sep 17 00:00:00 2001 From: Tobias Stenzel Date: Thu, 28 Nov 2024 22:46:40 +0100 Subject: [PATCH] Bring back loghost, graylog, elasticsearch6 roles - frozen packages for graylog and elasticsearch6-oss taken from our 22.05/22.11 platform versions - services/roles taken from 22.05/22.11 platform versions - use current upstream Elasticsearch module which is still compatible with ES 6 PL-132166 --- nixos/platform/static.nix | 1 + nixos/roles/default.nix | 7 +- nixos/roles/elasticsearch.nix | 440 +++++++++++++++++++ nixos/roles/graylog.nix | 324 ++++++++++++++ nixos/roles/loghost.nix | 65 +++ nixos/services/default.nix | 1 + nixos/services/graylog/default.nix | 506 ++++++++++++++++++++++ nixos/services/graylog/log4j2.xml | 34 ++ nixos/services/graylog/unused-package.nix | 34 ++ pkgs/fc/agent/fc/manage/graylog.py | 249 +++++++++++ pkgs/fc/agent/setup.py | 1 + pkgs/graylog/plugins.nix | 266 ++++++++++++ pkgs/overlay.nix | 20 + tests/loghost.nix | 103 +++++ 14 files changed, 2047 insertions(+), 4 deletions(-) create mode 100644 nixos/roles/elasticsearch.nix create mode 100644 nixos/roles/graylog.nix create mode 100644 nixos/roles/loghost.nix create mode 100644 nixos/services/graylog/default.nix create mode 100644 nixos/services/graylog/log4j2.xml create mode 100644 nixos/services/graylog/unused-package.nix create mode 100644 pkgs/fc/agent/fc/manage/graylog.py create mode 100644 pkgs/graylog/plugins.nix create mode 100644 tests/loghost.nix diff --git a/nixos/platform/static.nix b/nixos/platform/static.nix index 04637df16..56dda3e6c 100644 --- a/nixos/platform/static.nix +++ b/nixos/platform/static.nix @@ -142,6 +142,7 @@ with lib; # removed by upstream, we want to keep it memcached = 177; redis = 181; + graylog = 243; solr = 309; # Same as elasticsearch diff --git a/nixos/roles/default.nix b/nixos/roles/default.nix index 19b261ad3..44f9ef974 100644 --- a/nixos/roles/default.nix +++ b/nixos/roles/default.nix @@ -14,13 +14,16 @@ in { ./ceph/osd.nix ./ceph/rgw.nix ./devhost + ./elasticsearch.nix ./external_net ./ferretdb.nix ./gitlab.nix + ./graylog.nix ./jitsi ./k3s ./lamp.nix ./loki.nix + ./loghost.nix ./mailout.nix ./mailserver.nix ./matomo.nix @@ -42,10 +45,6 @@ in { ./webproxy.nix # Removed - (mkRemovedOptionModule [ "flyingcircus" "roles" "graylog" "enable" ] "Last platform version that supported graylog/loghost was 22.05.") - (mkRemovedOptionModule [ "flyingcircus" "roles" "elasticsearch6" "enable" ] "Last platform version that supported elasticsearch6 was 22.11.") - (mkRemovedOptionModule [ "flyingcircus" "roles" "elasticsearch7" "enable" ] "Last platform version that supported elasticsearch7 was 22.11.") - (mkRemovedOptionModule [ "flyingcircus" "roles" "loghost" "enable" ] "Last platform version that supported graylog/loghost was 22.05.") (mkRemovedOptionModule [ "flyingcircus" "roles" "loghost-location" "enable" ] "Last platform version that supported graylog/loghost was 22.05.") (mkRemovedOptionModule [ "flyingcircus" "roles" "mysql" "rootPassword" ] "Change the root password via MySQL and modify secret files.") (mkRemovedOptionModule [ "flyingcircus" "roles" "statshostproxy" "enable" ] "Use flyingcircus.roles.statshost-location-proxy.enable instead.") diff --git a/nixos/roles/elasticsearch.nix b/nixos/roles/elasticsearch.nix new file mode 100644 index 000000000..1c6292663 --- /dev/null +++ b/nixos/roles/elasticsearch.nix @@ -0,0 +1,440 @@ +{ options, config, lib, pkgs, ... }: + +with builtins; + +let + cfg = config.flyingcircus.roles.elasticsearch; + opts = options.flyingcircus.roles.elasticsearch; + cfg_service = config.services.elasticsearch; + fclib = config.fclib; + localConfigDir = "/etc/local/elasticsearch"; + + optionDoc = name: let + opt = opts."${name}"; + in + lib.concatStringsSep "\n\n" [ + "**flyingcircus.roles.elasticsearch.${name}**" + (lib.removePrefix "\n" (lib.removeSuffix "\n" opt.description)) + ]; + + formatList = list: + "[ ${lib.concatMapStringsSep " " (n: ''"${n}"'') list} ]"; + + esVersion = + if config.flyingcircus.roles.elasticsearch6.enable + then "6" + else if config.flyingcircus.roles.elasticsearch7.enable + then "7" + else null; + + package = versionConfiguration.${esVersion}.package; + enabled = esVersion != null; + + # XXX: We cannot get the config file path in the Nix store from Nix config. + # so we have to use the location where the config is copied to when + # Elasticsearch is started. There, only the elasticsearch user can read the + # config file which is annoying. + # This should be changed in the upstream module to make it possible to find + # the config file via a NixOS option and override it, if needed. + configFile = "/srv/elasticsearch/config/elasticsearch.yml"; + + versionConfiguration = { + "6" = { + package = pkgs.elasticsearch6-oss; + }; + "7" = { + package = pkgs.elasticsearch7-oss; + }; + null = { + package = null; + }; + }; + + esServices = + (fclib.findServices "elasticsearch6-node") ++ + (fclib.findServices "elasticsearch7-node"); + + defaultEsNodes = + map + (service: head (lib.splitString "." service.address)) + esServices; + + masterQuorum = (length cfg.esNodes) / 2 + 1; + + thisNode = + if config.networking.domain != null + then "${config.networking.hostName}.${config.networking.domain}" + else "localhost"; + + defaultClusterName = config.networking.hostName; + + configFromLocalConfigDir = + fclib.configFromFile "${localConfigDir}/elasticsearch.yml" ""; + + currentMemory = fclib.currentMemory 1024; + + esHeap = fclib.min + [ (currentMemory * cfg.heapPercentage / 100) + (31 * 1024)]; + + esShowConfig = pkgs.writeScriptBin "elasticsearch-show-config" '' + sudo -u elasticsearch cat ${configFile} + ''; + +in +{ + + options = with lib; { + + flyingcircus.roles.elasticsearch = { + + # This is a placeholder role, it does not support containers itself. + supportsContainers = fclib.mkDisableContainerSupport; + + clusterName = mkOption { + type = types.str; + default = fclib.configFromFile "${localConfigDir}/clusterName" defaultClusterName; + defaultText = "value from ${localConfigDir}/clusterName or host name"; + description = '' + The cluster name ES will use. By default, the string from + `${localConfigDir}/clusterName is used. If the file doesn't + exist, the host name is used as fallback. Because of this, you + have to set the cluster name explicitly if you want to set up a + multi-node cluster. + ''; + }; + + heapPercentage = mkOption { + type = types.int; + default = 50; + description = '' + Percentage of memory to use for ES heap. Defaults to 50 % of + available RAM: *systemMemory * heapPercentage / 100* + ''; + }; + + esNodes = mkOption { + type = types.listOf types.str; + default = defaultEsNodes; + defaultText = "all ES nodes in the resource group"; + description = '' + Names of the nodes that join this cluster and are eligible as masters. + By default, all ES nodes in a resource group are part of this cluster + and master-eligible. + Note that all of them have to use the same clusterName which must be + set explicitly when you want to set up a multi-node cluster. + + If only one esNode is given here, the node will start in single-node + mode which means that it won't try to find other ES nodes before + initializing the cluster. + + Having both ES6 and ES7 nodes in a cluster is possible. This allows + rolling upgrades. Note that new nodes that are added to a cluster + have to use the newest version. + + ES7: Values must use the same format as nodeName (just the hostname + by default) or cluster initialization will fail. + ''; + }; + + initialMasterNodes = mkOption { + type = types.listOf types.str; + default = []; + description = '' + *(ES7 only, has no effect for ES6)* + + Name of the nodes that should take a part in the initial master election. + WARNING: This should only be set when initializing a cluster with multiple nodes + from scratch and removed after the cluster has formed! + By default, this is empty which means that the node will join an existing + cluster or run in single-node mode when esNodes has only one entry. + You can set this to `config.flyingcircus.roles.elasticsearch.esNodes` to include + all automatically discovered nodes. + ''; + }; + + nodeName = mkOption { + type = types.nullOr types.string; + default = config.networking.hostName; + description = '' + The name for this node. Defaults to the hostname. + ''; + }; + extraConfig = mkOption { + type = types.lines; + default = ""; + description = '' + Additional YAML lines which are appended to the main `elasticsearch.yml` config file. + ''; + }; + }; + + flyingcircus.roles.elasticsearch6 = { + enable = mkEnableOption "Enable the Flying Circus elasticsearch6 role."; + supportsContainers = fclib.mkEnableContainerSupport; + }; + + flyingcircus.roles.elasticsearch7 = { + enable = mkEnableOption "Enable the Flying Circus elasticsearch7 role."; + supportsContainers = fclib.mkEnableContainerSupport; + }; + }; + + config = lib.mkMerge [ + + (lib.mkIf enabled { + + environment.systemPackages = [ + esShowConfig + ]; + + flyingcircus.roles.elasticsearch.extraConfig = configFromLocalConfigDir; + + services.elasticsearch = { + enable = true; + package = package; + listenAddress = thisNode; + dataDir = "/srv/elasticsearch"; + cluster_name = cfg.clusterName; + extraJavaOptions = [ + "-Des.path.scripts=${cfg_service.dataDir}/scripts" + "-Des.security.manager.enabled=false" + # Xms and Xmx are already defined as cmdline args by config/jvm.options. + # Appending the next two lines overrides the former. + "-Xms${toString esHeap}m" + "-Xmx${toString esHeap}m" + "-Dlog4j2.formatMsgNoLookups=true" + # Use new ES7 style for the publish address to avoid the annoying warning in ES6/7. + (lib.optionalString (esVersion == "6") "-Des.http.cname_in_publish_address=true") + (lib.optionalString (esVersion == "7") "-Des.transport.cname_in_publish_address=true") + ]; + + single_node = lib.length cfg.esNodes == 1; + + extraConf = '' + node.name: ${cfg.nodeName} + bootstrap.memory_lock: true + '' + (lib.optionalString (lib.versionOlder esVersion "7") '' + discovery.zen.minimum_master_nodes: ${toString masterQuorum} + discovery.zen.ping.unicast.hosts: ${toJSON cfg.esNodes} + '') + (lib.optionalString (lib.versionAtLeast esVersion "7") '' + discovery.seed_hosts: ${toJSON cfg.esNodes} + '') + (lib.optionalString (lib.versionAtLeast esVersion "7" && cfg.initialMasterNodes != []) '' + cluster.initial_master_nodes: ${toJSON cfg.initialMasterNodes} + '') + (lib.optionalString (cfg.extraConfig != "") '' + # flyingcircus.roles.elasticsearch.extraConfig + '' + cfg.extraConfig); + }; + + # Allow sudo-srv and service users to run commands as elasticsearch. + # There are various elasticsearch utility tools that have to be run as + # elasticsearch user. + flyingcircus.passwordlessSudoRules = [ + { + commands = [ "ALL" ]; + groups = [ "sudo-srv" "service" "elasticsearch" ]; + runAs = "elasticsearch"; + } + ]; + + flyingcircus.services.sensu-client = { + expectedDiskCapacity = { + # same as https://www.elastic.co/guide/en/elasticsearch/reference/7.17/modules-cluster.html#disk-based-shard-allocation + warning = 85; + critical = 90; + }; + }; + + systemd.services.elasticsearch = { + startLimitIntervalSec = 480; + startLimitBurst = 3; + serviceConfig = { + LimitMEMLOCK = "infinity"; + Restart = "always"; + }; + preStart = lib.mkAfter '' + # Install scripts + mkdir -p ${cfg_service.dataDir}/scripts + ''; + }; + + flyingcircus.activationScripts.elasticsearch = '' + install -d -o ${toString config.ids.uids.elasticsearch} -g service -m 02775 \ + ${localConfigDir} + ''; + + environment.etc."local/elasticsearch/elasticsearch.nix.example".text = '' + { config, pkgs, lib, ...}: + { + flyingcircus.roles.elasticsearch = { + # clusterName = "mycluster"; + # heapPercentage = 50; + # Only for initialization of new multi-node clusters! + # initialMasterNodes = config.flyingcircus.roles.elasticsearch.esNodes; + # extraConfig = ''' + # # some YAML + # '''; + }; + } + ''; + + environment.etc."local/elasticsearch/README.md".text = '' + Elasticsearch version ${esVersion}.x is running on this VM, with node + name `${cfg.nodeName}`. It is forming the cluster named + `${cfg.clusterName}` (${if cfg_service.single_node then "single-node" else "multi-node"}). + + The following nodes are eligible to be elected as master nodes: + `${formatList cfg.esNodes}` + + ${lib.optionalString (cfg.initialMasterNodes != []) '' + The node is running in multi-node bootstrap mode, `initialMasterNodes` is set to: + `${formatList cfg.initialMasterNodes}` + + WARNING: the `initialMasterNodes` setting should be removed after the cluster has formed! + ''} + + ## Interaction + + The Elasticsearch API is listening on the SRV interface. You can access + the API of nodes in the same project via HTTP without authentication. + Some examples: + + Show active nodes: + + ``` + curl ${config.networking.hostName}:9200/_cat/nodes + ``` + + Show cluster health: + + ``` + curl ${config.networking.hostName}:9200/_cat/health + ``` + + Show indices: + + ``` + curl ${config.networking.hostName}:9200/_cat/indices + ``` + + ## Configuration + + The role works without additional config for single-node setups. + By default, the cluster name is the host name of the machine. + + Custom config can be set via NixOS options and is required for multi-node + setups. Plain config in `${localConfigDir}` is still supported, too. + See `${localConfigDir}/elasticsearch/elasticsearch.nix.example` for an example. + Save the content to `/etc/local/nixos/elasticsearch.nix`, for example, to + include it in the system config. + + To see the final rendered config for Elasticsearch, use the + `elasticsearch-show-config` command as service or sudo-srv user. + + To activate config changes, run `sudo fc-manage --build`. + + ### NixOS Options + + ${optionDoc "clusterName"} + + ${optionDoc "heapPercentage"} + + ${optionDoc "esNodes"} + + ${optionDoc "initialMasterNodes"} + + ${optionDoc "extraConfig"} + + ## Legacy Custom Config + + You can add a file named `${localConfigDir}/clusterName`, with the + cluster name as its sole contents. + + To add additional configuration options, create a file + `${localConfigDir}/elasticsearch.yml`. Its contents will be appended to + the base configuration. + ''; + + flyingcircus.services.sensu-client.checks = { + + es_circuit_breakers = { + notification = "ES: Circuit Breakers active"; + command = '' + ${pkgs.sensu-plugins-elasticsearch}/bin/check-es-circuit-breakers.rb \ + -h ${thisNode} + ''; + interval = 300; + }; + + es_cluster_health = { + notification = "ES: Cluster Health"; + command = '' + ${pkgs.sensu-plugins-elasticsearch}/bin/check-es-cluster-health.rb \ + -h ${thisNode} + ''; + }; + + es_file_descriptor = { + notification = "ES: File descriptors in use"; + command = '' + ${pkgs.sensu-plugins-elasticsearch}/bin/check-es-file-descriptors.rb \ + -h ${thisNode} + ''; + interval = 300; + }; + + es_heap = { + notification = "ES: Heap too full"; + command = '' + ${pkgs.sensu-plugins-elasticsearch}/bin/check-es-heap.rb \ + -h ${thisNode} -w 80 -c 90 -P + ''; + interval = 300; + }; + + es_node_status = { + notification = "ES: Node status"; + command = '' + ${pkgs.sensu-plugins-elasticsearch}/bin/check-es-node-status.rb \ + -h ${thisNode} + ''; + }; + + es_shard_allocation_status = { + notification = "ES: Shard allocation status"; + command = '' + ${pkgs.sensu-plugins-elasticsearch}/bin/check-es-shard-allocation-status.rb \ + -s ${thisNode} + ''; + interval = 300; + }; + + }; + + systemd.services.prometheus-elasticsearch-exporter = { + description = "Prometheus exporter for elasticsearch metrics"; + wantedBy = [ "multi-user.target" ]; + path = [ pkgs.prometheus-elasticsearch-exporter ]; + script = '' + exec elasticsearch_exporter\ + --es.uri http://${thisNode}:9200 \ + --web.listen-address localhost:9108 + ''; + serviceConfig = { + User = "nobody"; + Restart = "always"; + PrivateTmp = true; + WorkingDirectory = /tmp; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; + }; + }; + + flyingcircus.services.telegraf.inputs = { + prometheus = [{ + urls = [ "http://localhost:9108/metrics" ]; + }]; + }; + }) + + ]; +} diff --git a/nixos/roles/graylog.nix b/nixos/roles/graylog.nix new file mode 100644 index 000000000..8a7de8ac4 --- /dev/null +++ b/nixos/roles/graylog.nix @@ -0,0 +1,324 @@ +# NOTES: +# * Mongo cluster setup requires manual intervention. +# * Logstash lumberjack plugin doesn't exist for graylog 3.x. +# Use integrated beats support. + +{ config, options, lib, pkgs, ... }: + +with builtins; + +let + cfg = config.flyingcircus.roles.graylog; + fclib = config.fclib; + glAPIHAPort = 8002; + gelfTCPHAPort = 12201; + beatsTCPHAPort = 12301; + listenFQDN = "${config.networking.hostName}.${config.networking.domain}"; + slash = addr: if fclib.isIp4 addr then "/32" else "/128"; + syslogInputPort = config.flyingcircus.services.graylog.syslogInputPort; + gelfTCPGraylogPort = config.flyingcircus.services.graylog.gelfTCPGraylogPort; + beatsTCPGraylogPort = config.flyingcircus.services.graylog.beatsTCPGraylogPort; + glAPIPort = config.flyingcircus.services.graylog.apiPort; + replSetName = if cfg.cluster then "graylog" else ""; + + jsonConfig = (fromJSON + (fclib.configFromFile /etc/local/graylog/graylog.json "{}")); + + # First cluster node becomes master + clusterNodes = + if cfg.cluster then + lib.unique + (filter + (s: lib.any (serviceType: s.service == serviceType) cfg.serviceTypes) + config.flyingcircus.encServices) + # single-node "cluster" + else [ { address = "${config.networking.hostName}.fcio.net"; + ips = fclib.network.srv.dualstack.addresses; } ]; + + masterHostname = + (head + (lib.splitString + "." + (head clusterNodes).address)); +in +{ + + options = with lib; { + + flyingcircus.roles.graylog = { + + enable = mkEnableOption '' + Graylog (3.x) role. + + Note: there can be multiple graylogs per RG, unlike loghost. + ''; + supportsContainers = fclib.mkDisableContainerSupport; + + serviceTypes = mkOption { + type = types.listOf types.str; + default = [ "graylog-server" ]; + description = '' + Service types that should be considered when forming the cluster. + Supported: graylog-server, loghost-server and loghost-location-graylog + ''; + }; + + cluster = mkOption { + type = types.bool; + default = true; + description = "Build a GL cluster. Usually disabled by loghost role."; + }; + + publicFrontend = { + + enable = mkEnableOption "Configure Nginx for GL UI on FE at 80/443?"; + + ssl = mkOption { + type = types.bool; + default = true; + description = '' + Enable SSL via Let's Encrypt. + ''; + }; + + hostName = mkOption { + type = types.nullOr types.str; + default = "graylog.${config.flyingcircus.enc.parameters.resource_group}.fcio.net"; + description = "HTTP host name for the GL frontend."; + example = "graylog.example.com"; + }; + }; + }; + }; + + config = lib.mkMerge [ + + (lib.mkIf (cfg.enable && cfg.publicFrontend.enable) { + services.nginx.virtualHosts."${cfg.publicFrontend.hostName}" = { + enableACME = cfg.publicFrontend.ssl; + forceSSL = cfg.publicFrontend.ssl; + locations = { + "/" = { + proxyPass = "http://${listenFQDN}:${toString glAPIHAPort}"; + extraConfig = '' + proxy_set_header Remote-User ""; + proxy_set_header X-Graylog-Server-URL https://${cfg.publicFrontend.hostName}/; + ''; + }; + "/admin" = { + proxyPass = "http://${listenFQDN}:${toString glAPIHAPort}"; + extraConfig = '' + auth_basic "FCIO user"; + auth_basic_user_file "/etc/local/nginx/htpasswd_fcio_users"; + ''; + }; + }; + }; + }) + + (lib.mkIf (jsonConfig ? heapPercentage) { + flyingcircus.services.graylog.heapPercentage = jsonConfig.heapPercentage; + }) + + (lib.mkIf (jsonConfig ? publicFrontend) { + flyingcircus.roles.graylog.publicFrontend.enable = jsonConfig.publicFrontend; + }) + + (lib.mkIf (jsonConfig ? publicFrontendHostname) { + flyingcircus.roles.graylog.publicFrontend.hostName = jsonConfig.publicFrontendHostname; + }) + + (lib.mkIf cfg.enable { + + networking.firewall.allowedTCPPorts = [ 9002 ]; + + networking.firewall.extraCommands = '' + ip46tables -A nixos-fw -i ethsrv -p udp --dport ${toString syslogInputPort} -j nixos-fw-accept + ip46tables -A nixos-fw -i ethsrv -p tcp --dport ${toString beatsTCPHAPort} -j nixos-fw-accept + ''; + + flyingcircus.services.graylog = { + + enable = true; + isMaster = masterHostname == config.networking.hostName; + + mongodbUri = let + repl = if (length clusterNodes) > 1 then "?replicaSet=${replSetName}" else ""; + mongodbNodes = concatStringsSep "," + (map (node: "${fclib.quoteIPv6Address (head (filter fclib.isIp6 node.ips))}:27017") clusterNodes); + in + "mongodb://${mongodbNodes}/graylog${repl}"; + + config = jsonConfig.extraGraylogConfig or {}; + + }; + + flyingcircus.roles.mongodb40.enable = true; + + services.mongodb.replSetName = replSetName; + services.mongodb.extraConfig = '' + storage.wiredTiger.engineConfig.cacheSizeGB: 1 + ''; + + flyingcircus.services.nginx.enable = true; + + flyingcircus.localConfigDirs.graylog = { + dir = "/etc/local/graylog"; + user = "graylog"; + }; + + environment.etc."local/graylog/README.txt".text = '' + Graylog (${config.services.graylog.package.version}) is running on this machine. + + If you need to set non-default configuration options, you can put a + file called `graylog.json` into this directory. + Have a look at graylog.json.example in this directory. + + Available options: + + * publicFrontend: set to true to serve the Graylog dashboard on + the public interface via HTTPS. + * publicFrontendHostname: set hostname for Graylog dashboard, + default is ${options.flyingcircus.roles.graylog.publicFrontend.hostName.default}. + * heapPercentage (int): Fraction of system memory that is used for + Graylog, in percent. + * extraGraylogConfig (object): Addional config params supported by + Graylog's server config file. + See https://docs.graylog.org/en/3.0/pages/configuration/server.conf.html. + + ''; + + environment.etc."local/graylog/graylog.json.example".text = '' + { + "publicFrontend": true, + "heapPercentage": 70, + "extraGraylogConfig": { + "processbuffer_processors": 4, + "trusted_proxies": "127.0.0.1/32, 0:0:0:0:0:0:0:1/128" + } + } + ''; + + services.nginx.virtualHosts."${cfg.publicFrontend.hostName}:9002" = + let + mkListen = addr: { inherit addr; port = 9002; }; + in { + listen = map mkListen (fclib.network.srv.dualstack.addressesQuoted); + locations = { + "/" = { + proxyPass = "http://${listenFQDN}:${toString glAPIHAPort}"; + extraConfig = '' + # Direct access w/o prior authentication. This is useful for API access. + # Strip Remote-User as there is nothing in between the user and us. + proxy_set_header Remote-User ""; + proxy_set_header X-Graylog-Server-URL http://${listenFQDN}:9002/; + ''; + }; + }; + }; + # HAProxy load balancer. + # Since haproxy is rather lightweight we just fire up one on each graylog + # node, talking to all known graylog nodes. + flyingcircus.services.haproxy = let + # Journalbeat uses long-running connections and may send nothing + # for a while. Use ttl 120s for Journalbeat to make sure it + # reconnects before it's thrown out by HAproxy. + beatsTimeout = "121s"; + graylogTimeout = "121s"; + gelfTimeout = "10s"; + mkBinds = port: + map + (addr: "${addr}:${toString port}") + fclib.network.srv.dualstack.addresses; + in { + enable = true; + enableStructuredConfig = true; + + frontend = { + gelf-tcp-in = { + binds = mkBinds gelfTCPHAPort; + mode = "tcp"; + options = [ "tcplog" ]; + timeout.client = gelfTimeout; + default_backend = "gelf_tcp"; + }; + + beats-tcp-in = { + binds = mkBinds beatsTCPHAPort; + mode = "tcp"; + options = [ "tcplog" ]; + timeout.client = beatsTimeout; + default_backend = "beats_tcp"; + }; + + graylog_http = { + binds = mkBinds glAPIHAPort; + options = [ "httplog" ]; + timeout.client = graylogTimeout; + default_backend = "graylog"; + }; + }; + + backend = { + gelf_tcp = { + mode = "tcp"; + options = [ "httpchk HEAD /api/system/lbstatus" ]; + timeout.server = gelfTimeout; + timeout.tunnel = "61s"; + servers = map + ( node: + "${node.address} ${head (filter fclib.isIp6 node.ips)}:${toString gelfTCPGraylogPort}" + + " check port ${toString glAPIPort} inter 10s rise 2 fall 1" + ) + clusterNodes; + balance = "leastconn"; + }; + + beats_tcp = { + mode = "tcp"; + options = [ "httpchk HEAD /api/system/lbstatus" ]; + timeout.server = beatsTimeout; + servers = map + ( node: + "${node.address} ${head (filter fclib.isIp6 node.ips)}:${toString beatsTCPGraylogPort}" + + " check port ${toString glAPIPort} inter 10s rise 2 fall 1" + ) + clusterNodes; + balance = "leastconn"; + }; + + graylog = { + options = [ "httpchk GET /" ]; + timeout.server = graylogTimeout; + servers = map + ( node: + "${node.address} ${head (filter fclib.isIp6 node.ips)}:${toString glAPIPort}" + + " check fall 1 rise 2 inter 10s maxconn 500" + ) + clusterNodes; + balance = "roundrobin"; + }; + + stats = { + extraConfig = '' + stats uri / + stats refresh 5s + ''; + }; + }; + }; + }) + + { + flyingcircus.roles.statshost.prometheusMetricRelabel = [ + { + source_labels = [ "__name__" ]; + regex = "(org_graylog2)_(.*)$"; + replacement = "graylog_\${2}"; + target_label = "__name__"; + } + ]; + } + + ]; +} diff --git a/nixos/roles/loghost.nix b/nixos/roles/loghost.nix new file mode 100644 index 000000000..387a99eff --- /dev/null +++ b/nixos/roles/loghost.nix @@ -0,0 +1,65 @@ +{ config, lib, pkgs, ... }: + +with builtins; + +let + cfg = config.flyingcircus.roles.loghost; + fclib = config.fclib; + + # It's common to have stathost and loghost on the same node. Each should + # use half of the memory then. A general approach for this kind of + # multi-service would be nice. + heapCorrection = + if config.flyingcircus.roles.statshost-master.enable + then 50 + else 100; +in +{ + + options = { + + flyingcircus.roles.loghost = { + enable = lib.mkEnableOption '' + Flying Circus Loghost role. + This role enables the full graylog stack at once (GL, ES, Mongo). + ''; + supportsContainers = fclib.mkDisableContainerSupport; + }; + }; + + config = lib.mkIf (cfg.enable) { + + flyingcircus.roles.graylog = { + enable = true; + cluster = false; + serviceTypes = [ "loghost-server" ]; + }; + + flyingcircus.services.graylog = { + heapPercentage = fclib.mkPlatform (15 * heapCorrection / 100); + elasticsearchHosts = [ + "http://${config.networking.hostName}:9200" + ]; + }; + + # Graylog 3.x wants Elasticsearch 6, ES7 does not work (yet). + flyingcircus.roles.elasticsearch6.enable = true; + flyingcircus.roles.elasticsearch = { + clusterName = "graylog"; + esNodes = [ config.networking.hostName ]; + heapPercentage = fclib.mkPlatform (35 * heapCorrection / 100); + # Disable automatic index creation which can mess up the + # index structure expected by Graylog and prevent index rotation. + # Graylog writes data to an alias called graylog_deflector which has + # to be created before writing to it. We didn't have this setting in the + # past and saw that graylog_deflector was sometimes + # automatically created as an index by ES. + # Recommended by Graylog docs (https://archivedocs.graylog.org/en/3.3/pages/installation/os/centos.html). + extraConfig = '' + action.auto_create_index: false + ''; + }; + + }; + +} diff --git a/nixos/services/default.nix b/nixos/services/default.nix index 51b1cb3c3..f4c4c2ae6 100755 --- a/nixos/services/default.nix +++ b/nixos/services/default.nix @@ -20,6 +20,7 @@ in { ./ceph/server.nix ./consul.nix ./ferretdb.nix + ./graylog ./haproxy ./jitsi/jibri.nix ./jitsi/jicofo.nix diff --git a/nixos/services/graylog/default.nix b/nixos/services/graylog/default.nix new file mode 100644 index 000000000..e8c2fc2ce --- /dev/null +++ b/nixos/services/graylog/default.nix @@ -0,0 +1,506 @@ +{ config, lib, pkgs, ... }: + +with builtins; + +let + cfg = config.flyingcircus.services.graylog; + fclib = config.fclib; + + listenFQDN = "${config.networking.hostName}.${config.networking.domain}"; + # graylog listens on first srv ipv6 address + listenIP = (head config.networking.interfaces.ethsrv.ipv6.addresses).address; + # FQDN doesn't work here + httpBindAddress = "[${listenIP}]:${toString cfg.apiPort}"; + webListenUri = "http://${listenFQDN}:${toString cfg.apiPort}"; + restListenUri = "${webListenUri}/api"; + + glNodes = + fclib.listServiceAddresses "loghost-server" ++ + fclib.listServiceAddresses "graylog-server"; + + glPlugins = pkgs.buildEnv { + name = "graylog-plugins"; + paths = cfg.plugins; + }; + + # Secrets can be set in advance (for example, to share a password across nodes). + # Missing files will be generated when the graylog service starts. + + # This password can be used to login with the admin user. + rootPasswordFile = "/etc/local/graylog/password"; + passwordSecretFile = "/etc/local/graylog/password_secret"; + + rootPassword = fclib.servicePassword { + user = cfg.user; + file = rootPasswordFile; + token = config.networking.hostName; + }; + + passwordSecret = fclib.servicePassword { + user = cfg.user; + file = passwordSecretFile; + token = config.networking.hostName; + }; + + graylogShowConfig = pkgs.writeScriptBin "graylog-show-config" '' + cat /run/graylog/graylog.conf + ''; + + defaultGraylogConfig = let + slash = addr: if fclib.isIp4 addr then "/32" else "/128"; + otherGraylogNodes = + filter + (a: elem "${a.name}.${config.networking.domain}" glNodes) + config.flyingcircus.encAddresses; + + in { + http_bind_address = httpBindAddress; + http_publish_uri = webListenUri; + timezone = config.time.timeZone; + + processbuffer_processors = + fclib.max [ + ((fclib.currentCores 1) - 2) + 5 + ]; + + outputbuffer_processors = + fclib.max [ + ((fclib.currentCores 1) / 2) + 3 + ]; + } // + lib.optionalAttrs (otherGraylogNodes != []) { + trusted_proxies = + concatMapStringsSep + ", " + (a: (fclib.stripNetmask a.ip) + (slash a.ip)) + otherGraylogNodes; + }; + + graylogConf = let + mkLine = name: value: "${name} = ${toString value}"; + in '' + is_master = ${lib.boolToString cfg.isMaster} + node_id_file = ${cfg.nodeIdFile} + elasticsearch_hosts = ${lib.concatStringsSep "," cfg.elasticsearchHosts} + message_journal_dir = ${cfg.messageJournalDir} + mongodb_uri = ${cfg.mongodbUri} + plugin_dir = /var/lib/graylog/plugins + + # secrets + root_password_sha2 = $(sha256sum ${rootPassword.file} | cut -f1 -d " ") + password_secret = $(cat "${passwordSecret.file}") + + # disable version check as its really annoying and obscures real errors + versionchecks = false + + # Settings here can be overridden by flyingcircus.services.graylog.config. + '' + lib.concatStringsSep + "\n" + (lib.mapAttrsToList + mkLine + (defaultGraylogConfig // cfg.config)); + + graylogConfPath = "/run/graylog/graylog.conf"; + + telegrafUsername = "telegraf-${config.networking.hostName}"; + telegrafPassword = fclib.derivePasswordForHost "graylog-telegraf"; + +in { + + options = with lib; { + + flyingcircus.services.graylog = { + + enable = mkEnableOption "Preconfigured Graylog (3.x)."; + + package = mkOption { + type = types.package; + default = pkgs.graylog-3_3; + defaultText = "pkgs.graylog-3_3"; + description = "Graylog package to use. Only works with Graylog 3.3"; + }; + + user = mkOption { + type = types.str; + default = "graylog"; + example = literalExample "graylog"; + description = "User account under which graylog runs"; + }; + + isMaster = mkOption { + type = types.bool; + default = true; + description = "Use this graylog node as master. Only one master per cluster is allowed."; + }; + + nodeIdFile = mkOption { + type = types.str; + default = "/var/lib/graylog/server/node-id"; + description = "Path of the file containing the graylog node-id"; + }; + + elasticsearchHosts = mkOption { + type = types.listOf types.str; + example = literalExample ''[ "http://node1:9200" "http://user:password@node2:19200" ]''; + description = "List of valid URIs of the http ports of your elastic nodes. If one or more of your elasticsearch hosts require authentication, include the credentials in each node URI that requires authentication"; + }; + + messageJournalDir = mkOption { + type = types.str; + default = "/var/lib/graylog/data/journal"; + description = "The directory which will be used to store the message journal. The directory must be exclusively used by Graylog and must not contain any other files than the ones created by Graylog itself"; + }; + + mongodbUri = mkOption { + type = types.str; + default = "mongodb://localhost/graylog"; + description = "MongoDB connection string. See http://docs.mongodb.org/manual/reference/connection-string/ for details"; + }; + + plugins = mkOption { + description = "Extra graylog plugins"; + default = with pkgs.graylogPlugins; [ slack ]; + type = types.listOf types.package; + }; + + heapPercentage = mkOption { + type = types.int; + default = 70; + description = "How much RAM should go to graylog heap."; + }; + + beatsTCPGraylogPort = mkOption { + type = types.int; + default = 12302; + }; + + gelfTCPGraylogPort = mkOption { + type = types.int; + default = 12202; + }; + + apiPort = mkOption { + type = types.int; + default = 9001; + }; + + syslogInputPort = mkOption { + type = types.int; + default = 5140; + description = "UDP Port for the Graylog syslog input."; + }; + + config = mkOption { + type = types.attrs; + default = {}; + description = '' + Additional config params for the Graylog server config file. + They override default settings defined by this service with the same name. + ''; + }; + }; + }; + + + config = lib.mkIf cfg.enable { + + users.users = lib.mkIf (cfg.user == "graylog") { + graylog = { + isSystemUser = true; + uid = config.ids.uids.graylog; + description = "Graylog server daemon user"; + group = "graylog"; + }; + }; + + users.groups = lib.mkIf (cfg.user == "graylog") { + graylog = {}; + }; + + systemd.tmpfiles.rules = [ + "d '${cfg.messageJournalDir}' - ${cfg.user} - - -" + "d '/run/graylog' - ${cfg.user} - - -" + # Purge geolite DB that has been created by a timer in earlier releases. + "r /var/lib/graylog/GeoLite2-City.mmdb" + ]; + + environment.etc."local/graylog/api_url".text = restListenUri; + + environment.systemPackages = [ graylogShowConfig ]; + + systemd.services.graylog = { + + description = "Graylog Server"; + wantedBy = [ "multi-user.target" ]; + environment = let + pkg = config.services.graylog.package; + javaHeap = ''${toString + (fclib.max [ + ((fclib.currentMemory 1024) * cfg.heapPercentage / 100) + 768 + ])}m''; + + javaOpts = [ + "-Djava.library.path=${pkg}/lib/sigar" + "-Dlog4j.configurationFile=file://${./log4j2.xml}" + "-Xms${javaHeap}" + "-Xmx${javaHeap}" + "-XX:NewRatio=1" + "-server" + "-XX:+ResizeTLAB" + "-XX:+UseConcMarkSweepGC" + "-XX:+CMSConcurrentMTEnabled" + "-XX:+CMSClassUnloadingEnabled" + "-XX:+UseParNewGC" + "-XX:-OmitStackTraceInFastThrow" + ]; + + in { + JAVA_HOME = pkgs.jdk8_headless; + GRAYLOG_CONF = graylogConfPath; + JAVA_OPTS = lib.concatStringsSep " " javaOpts; + }; + + path = [ pkgs.jdk8_headless pkgs.which pkgs.procps ]; + + preStart = '' + rm -rf /var/lib/graylog/plugins || true + mkdir -p /var/lib/graylog/plugins -m 755 + + mkdir -p "$(dirname ${cfg.nodeIdFile})" + chown -R ${cfg.user} "$(dirname ${cfg.nodeIdFile})" + + for declarativeplugin in `ls ${glPlugins}/bin/`; do + ln -sf ${glPlugins}/bin/$declarativeplugin /var/lib/graylog/plugins/$declarativeplugin + done + for includedplugin in `ls ${cfg.package}/plugin/`; do + ln -s ${cfg.package}/plugin/$includedplugin /var/lib/graylog/plugins/$includedplugin || true + done + + # Generate secrets if missing and write config file + + ${rootPassword.generate} + ${passwordSecret.generate} + + cat > ${graylogConfPath} << EOF + ${graylogConf} + EOF + + chown ${cfg.user}:service ${graylogConfPath} + chmod 440 ${graylogConfPath} + ''; + + postStart = '' + # Wait until GL is available for use + for count in {0..10000}; do + ${pkgs.curl}/bin/curl -m 2 -s ${webListenUri} && exit + echo "Trying to connect to ${webListenUri} for ''${count}s" + sleep 1 + done + exit 1 + ''; + + serviceConfig = { + Restart = "always"; + # Starting just takes a long time... + TimeoutStartSec = 360; + PermissionsStartOnly = true; + User = "${cfg.user}"; + StateDirectory = "graylog"; + ExecStart = "${cfg.package}/bin/graylogctl run"; + }; + + }; + + systemd.services.fc-graylog-config = { + description = "Configure Graylog FCIO settings"; + requires = [ "graylog.service" ]; + after = [ "graylog.service" "mongodb.service" "elasticsearch.service" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + Type = "oneshot"; + User = config.services.graylog.user; + RemainAfterExit = true; + }; + script = let + + syslogUdpConfiguration = { + configuration = { + bind_address = "0.0.0.0"; + port = cfg.syslogInputPort; + }; + title = "Syslog UDP"; # be careful changing it, it's used as + # a primary key for identifying the config + # object + type = "org.graylog2.inputs.syslog.udp.SyslogUDPInput"; + global = true; + }; + + gelfTcpConfiguration = { + configuration = { + bind_address = "0.0.0.0"; + port = cfg.gelfTCPGraylogPort; + }; + title = "GELF TCP"; + type = "org.graylog2.inputs.gelf.tcp.GELFTCPInput"; + global = true; + }; + + beatsTcpConfiguration = { + configuration = { + bind_address = "0.0.0.0"; + no_beats_prefix = true; + port = cfg.beatsTCPGraylogPort; + }; + title = "Beats TCP"; + type = "org.graylog.plugins.beats.Beats2Input"; + global = true; + }; + + geodbConfiguration = { + enabled = true; + db_type = "MAXMIND_CITY"; + db_path = "/var/lib/graylog/GeoLite2-City.mmdb"; + }; + + ldapConfiguration = { + enabled = true; + system_username = fclib.getLdapNodeDN; + system_password = fclib.getLdapNodePassword; + ldap_uri = "ldaps://ldap.fcio.net:636/"; + trust_all_certificates = true; + use_start_tls = false; + active_directory = false; + search_base = "ou=People,dc=gocept,dc=com"; + search_pattern = "(&(&(objectClass=inetOrgPerson)(uid={0}))(memberOf=cn=${config.flyingcircus.enc.parameters.resource_group},ou=GroupOfNames,dc=gocept,dc=com))"; + display_name_attribute = "displayName"; + default_group = "Admin"; + }; + + metricsRole = { + description = "Provides read access to all system metrics"; + permissions = ["metrics:*"]; + read_only = false; + }; + + telegrafUser = { + password = telegrafPassword; + roles = [ "Metrics" ]; + }; + + callApi = what: "${pkgs.fc.agent}/bin/fc-graylog ${what}"; + + configureInput = input: + callApi "configure --input '${toJSON input}'"; + in '' + ${configureInput syslogUdpConfiguration} + ${configureInput gelfTcpConfiguration} + ${configureInput beatsTcpConfiguration} + + ${callApi '' + call \ + -s 202 \ + /system/cluster_config/org.graylog.plugins.map.config.GeoIpResolverConfig \ + '${toJSON geodbConfiguration}' + ''} + + ${callApi '' + call \ + -s 204 \ + /system/ldap/settings \ + '${toJSON ldapConfiguration}' + ''} + + ${callApi "ensure-role Metrics '${toJSON metricsRole}'"} + + ${callApi "ensure-user ${telegrafUsername} '${toJSON telegrafUser}'"} + ''; + }; + + systemd.services.graylog-collect-journal-age-metric = rec { + description = "Collect journal age and report to Telegraf"; + wantedBy = [ "graylog.service" "telegraf.service" "fc-graylog-config.service" ]; + after = wantedBy; + serviceConfig = { + User = "telegraf"; + Restart = "always"; + RestartSec = "10"; + ExecStart = '' + ${pkgs.fc.agent}/bin/fc-graylog \ + -u ${telegrafUsername} \ + -p '${telegrafPassword}' \ + collect-journal-age-metric --socket-path /run/telegraf/influx.sock + + ''; + }; + }; + + services.collectd.extraConfig = '' + LoadPlugin curl_json + + + User "admin" + Password "${rootPassword.value}" + Header "Accept: application/json" + Instance "graylog" + + Type "gauge" + + + Type "gauge" + + + Type "gauge" + + + + User "admin" + Password "${rootPassword.value}" + Header "Accept: application/json" + Instance "graylog" + + Type "gauge" + + + + ''; + + flyingcircus.services.sensu-client.checks = { + + graylog_ui = { + notification = "Graylog UI alive"; + command = '' + ${pkgs.monitoring-plugins}/bin/check_http \ + -H ${listenFQDN} -p ${toString cfg.apiPort} \ + -u / + ''; + }; + + }; + + flyingcircus.services.telegraf.inputs.graylog = [ + { + servers = [ "${restListenUri}/system/metrics/multiple" ]; + metrics = [ "jvm.memory.total.committed" + "jvm.memory.total.used" + "jvm.threads.count" + "org.graylog2.buffers.input.size" + "org.graylog2.buffers.input.usage" + "org.graylog2.buffers.output.size" + "org.graylog2.buffers.output.usage" + "org.graylog2.buffers.process.size" + "org.graylog2.buffers.process.usage" + "org.graylog2.journal.oldest-segment" + "org.graylog2.journal.size" + "org.graylog2.journal.size-limit" + "org.graylog2.throughput.input" + "org.graylog2.throughput.output" ]; + username = telegrafUsername; + password = telegrafPassword; + } + ]; + }; + +} diff --git a/nixos/services/graylog/log4j2.xml b/nixos/services/graylog/log4j2.xml new file mode 100644 index 000000000..131ad01ff --- /dev/null +++ b/nixos/services/graylog/log4j2.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/nixos/services/graylog/unused-package.nix b/nixos/services/graylog/unused-package.nix new file mode 100644 index 000000000..5db9e8f00 --- /dev/null +++ b/nixos/services/graylog/unused-package.nix @@ -0,0 +1,34 @@ +{ lib, stdenv, fetchurl, makeWrapper, openjdk11_headless, nixosTests }: + +stdenv.mkDerivation rec { + pname = "graylog"; + version = "3.3.16"; + + src = fetchurl { + url = "https://packages.graylog2.org/releases/graylog/graylog-${version}.tgz"; + sha256 = "sha256-P/cnfYKnMSnDD4otEyirKlLaFduyfSO9sao4BY3c3Z4="; + }; + + dontBuild = true; + dontStrip = true; + + nativeBuildInputs = [ makeWrapper ]; + makeWrapperArgs = [ "--set-default" "JAVA_HOME" "${openjdk11_headless}" ]; + + passthru.tests = { inherit (nixosTests) graylog; }; + + installPhase = '' + mkdir -p $out + cp -r {graylog.jar,lib,bin,plugin} $out + wrapProgram $out/bin/graylogctl $makeWrapperArgs + ''; + + meta = with lib; { + description = "Open source log management solution"; + homepage = "https://www.graylog.org/"; + license = licenses.gpl3; + maintainers = [ maintainers.fadenb ]; + mainProgram = "graylogctl"; + platforms = platforms.unix; + }; +} diff --git a/pkgs/fc/agent/fc/manage/graylog.py b/pkgs/fc/agent/fc/manage/graylog.py new file mode 100644 index 000000000..3b3029cf5 --- /dev/null +++ b/pkgs/fc/agent/fc/manage/graylog.py @@ -0,0 +1,249 @@ +#! /usr/bin/env nix-shell +#! nix-shell -i python3 -p python37Packages.python -p python37Packages.requests -p python37Packages.dateutil -p python37Packages.click +# +# input sample +# { +# "configuration": { +# "tls_key_password": "P@ssw0rd", +# "recv_buffer_size": 1048576, +# "max_message_size": 2097152, +# "bind_address": "0.0.0.0", +# "port": 12201, +# "tls_enable": false, +# "use_null_delimiter": true +# }, +# "title": "myNewGlobalGelfTcpInput", +# "global": true, +# "type": "org.graylog2.inputs.gelf.tcp.GELFTCPInput" +# } +# 201 -> success +# returns input id +# +# requests.post(api + '/system/inputs/', auth=(user, pw), json=data).text +# >>> '{"id":"57fe09c2ec3fa136a780adb9"}' +import json +import logging +import os.path +import socket +import time + +import click +import dateutil.parser +import requests + +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) + +log = logging.getLogger("fc-graylog") + + +@click.group() +@click.option("-u", "--user", default="admin", show_default=True) +@click.option("-p", "--password") +@click.option( + "-P", + "--password-file", + default="/etc/local/graylog/password", + show_default=True, +) +@click.option("--api", "-a") +@click.option( + "-A", "--api-file", default="/etc/local/graylog/api_url", show_default=True +) +@click.pass_context +def main(ctx, api, user, password, password_file, api_file): + graylog = requests.Session() + + if not password: + with open(password_file) as f: + password = f.read() + + if not api: + with open(api_file) as f: + api = f.read() + + graylog.auth = (user, password) + graylog.headers = {"X-Requested-By": "cli", "Accept": "application/json"} + graylog.api = api + ctx.obj = graylog + + +@click.command() +@click.option("--input") +@click.option("--raw-path") +@click.option("--raw-data") +@click.pass_obj +def configure(graylog, input, raw_path, raw_data): + """Configure a Graylog input node.""" + api = graylog.api + + if input: + # check if there is input with this name currently configured, + # if so return + data = json.loads(input) + log.info("Checking intput: %s", data["title"]) + response = graylog.get(api + "/system/cluster/node") + response.raise_for_status() + data["node"] = response.json()["node_id"] + response = graylog.get(api + "/system/inputs") + response.raise_for_status() + for _input in response.json()["inputs"]: + if _input["title"] == data["title"]: + log.info( + "Graylog input already configured. Updating: %s", + data["title"], + ) + response = graylog.put( + api + "/system/inputs/%s" % _input["id"], json=data + ) + response.raise_for_status() + break + else: + response = graylog.post(api + "/system/inputs", json=data) + response.raise_for_status() + log.info("Graylog input configured: %s", data["title"]) + + if raw_path and raw_data: + log.info("Update %s", raw_path) + data = json.loads(raw_data) + response = graylog.put(api + raw_path, json=data) + response.raise_for_status() + + +main.add_command(configure) + + +@click.command() +@click.option("--socket-path") +@click.pass_obj +def collect_journal_age_metric(graylog, socket_path): + """Sends journal age metrics to telegraf periodically.""" + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.connect(socket_path) + while True: + response = graylog.get( + graylog.api + "/system/metrics/org.graylog2.journal.oldest-segment" + ) + + response.raise_for_status() + segment_date = dateutil.parser.parse(response.json()["value"]) + response_date = dateutil.parser.parse(response.headers["date"]) + age = (response_date - segment_date).total_seconds() + s.send(("graylog_journal_age value=%f\n" % age).encode("us-ascii")) + time.sleep(10) + + +main.add_command(collect_journal_age_metric) + + +def handle_api_response(response, expected_status=[200], log_response=True): + if response.status_code in expected_status: + log.info( + "%s (%d) %s", + response.url, + response.status_code, + response.text if log_response else "", + ) + if response.text: + return response.json() + elif response.status_code == 400: + log.error("%s bad request: %s", response.url, response.text) + else: + log.warning( + "%s unexpected status: %d", response.url, response.status_code + ) + + response.raise_for_status() + + +@click.command() +@click.argument("name") +@click.argument("config") +@click.pass_obj +def ensure_user(graylog, name, config): + """Creates or updates a Graylog user.""" + users_url = f"{graylog.api}/users" + user_url = f"{users_url}/{name}" + resp = graylog.get(user_url) + data = { + "username": name, + "full_name": name, + "email": f"{name}@localhost", + "permissions": [], + **json.loads(config), + } + + handle_api_response(resp, [200, 404]) + + if resp.ok: + log.info("user %s exists, updating", name) + resp = graylog.put(user_url, json=data) + handle_api_response(resp, [204]) + if resp.status_code == 404: + log.info("creating user %s", name) + resp = graylog.post(users_url, json=data) + handle_api_response(resp, [201]) + + +main.add_command(ensure_user) + + +@click.command() +@click.argument("name") +@click.argument("config") +@click.pass_obj +def ensure_role(graylog, name, config): + """Creates or updates a Graylog role""" + roles_url = f"{graylog.api}/roles" + role_url = f"{roles_url}/{name}" + resp = graylog.get(role_url) + data = {"name": name, **json.loads(config)} + + handle_api_response(resp, [200, 404]) + + if resp.ok: + log.info("role %s exists, updating", name) + resp = graylog.put(role_url, json=data) + handle_api_response(resp) + if resp.status_code == 404: + log.info("creating role %s", name) + resp = graylog.post(roles_url, json=data) + handle_api_response(resp, [201]) + + +main.add_command(ensure_role) + + +@click.command() +@click.argument("path") +@click.argument("raw") +@click.option("--method", type=click.Choice(["POST", "PUT"]), default="PUT") +@click.option( + "--expected-status", "-s", multiple=True, default=[200], type=int +) +@click.pass_obj +def call(graylog, path, raw, method, expected_status): + """Runs an arbitrary API PUT/POST request.""" + data = json.loads(raw) + resp = graylog.request(method, graylog.api + path, json=data) + handle_api_response(resp, expected_status) + + +main.add_command(call) + + +@click.command() +@click.argument("path") +@click.option("--expected-status", "-s", multiple=True, default=[200]) +@click.option("--log-response/--no-log-response", "-l", default=False) +@click.pass_obj +def get(graylog, path, expected_status, log_response): + """Runs an arbitrary API GET request.""" + resp = graylog.get(graylog.api + path) + result = handle_api_response(resp, expected_status, log_response) + print(resp.text) + + +main.add_command(get) + +if __name__ == "__main__": + main() diff --git a/pkgs/fc/agent/setup.py b/pkgs/fc/agent/setup.py index 74c6c7d60..b5c50d53f 100644 --- a/pkgs/fc/agent/setup.py +++ b/pkgs/fc/agent/setup.py @@ -69,6 +69,7 @@ "fc-backy=fc.manage.backy:main", "fc-collect-garbage=fc.manage.collect_garbage:app", "fc-directory=fc.util.directory:directory_cli", + "fc-graylog=fc.manage.graylog:main", "fc-kubernetes=fc.manage.kubernetes:app", "fc-maintenance=fc.maintenance.cli:app", "fc-manage=fc.manage.cli:app", diff --git a/pkgs/graylog/plugins.nix b/pkgs/graylog/plugins.nix new file mode 100644 index 000000000..6822ccbb4 --- /dev/null +++ b/pkgs/graylog/plugins.nix @@ -0,0 +1,266 @@ +{ pkgs, lib, stdenv, fetchurl, unzip, graylog }: + +with pkgs.lib; + +let + glPlugin = a@{ + pluginName, + version, + installPhase ? '' + mkdir -p $out/bin + cp $src $out/bin/${pluginName}-${version}.jar + '', + ... + }: + stdenv.mkDerivation (a // { + inherit installPhase; + dontUnpack = true; + nativeBuildInputs = [ unzip ]; + }); +in { + aggregates = glPlugin rec { + name = "graylog-aggregates-${version}"; + pluginName = "graylog-plugin-aggregates"; + version = "2.4.0"; + src = fetchurl { + url = "https://github.com/cvtienhoven/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "1c48almnjr0b6nvzagnb9yddqbcjs7yhrd5yc5fx9q7w3vxi50zp"; + }; + meta = { + homepage = "https://github.com/cvtienhoven/graylog-plugin-aggregates"; + description = "A plugin that enables users to execute term searches and get notified when the given criteria are met"; + }; + }; + auth_sso = glPlugin rec { + name = "graylog-auth-sso-${version}"; + pluginName = "graylog-plugin-auth-sso"; + version = "3.3.0"; + src = fetchurl { + url = "https://github.com/Graylog2/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "1g47hlld8vzicd47b5i9n2816rbrhv18vjq8gp765c7mdg4a2jn8"; + }; + meta = { + homepage = "https://github.com/Graylog2/graylog-plugin-auth-sso"; + description = "SSO support for Graylog through trusted HTTP headers set by load balancers or authentication proxies"; + }; + }; + dnsresolver = glPlugin rec { + name = "graylog-dnsresolver-${version}"; + pluginName = "graylog-plugin-dnsresolver"; + version = "1.2.0"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "0djlyd4w4mrrqfbrs20j1xw0fygqsb81snz437v9bf80avmcyzg1"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-dnsresolver"; + description = "Message filter plugin can be used to do DNS lookups for the source field in Graylog messages"; + }; + }; + enterprise-integrations = glPlugin rec { + name = "graylog-enterprise-integrations-${version}"; + pluginName = "graylog-plugin-enterprise-integrations"; + version = "3.3.9"; + src = fetchurl { + url = "https://downloads.graylog.org/releases/graylog-enterprise-integrations/graylog-enterprise-integrations-plugins-${version}.tgz"; + sha256 = "0yr2lmf50w8qw5amimmym6y4jxga4d7s7cbiqs5sqzvipgsknbwj"; + }; + installPhase = '' + mkdir -p $out/bin + tar --strip-components=2 -xf $src + cp ${pluginName}-${version}.jar $out/bin/${pluginName}-${version}.jar + ''; + meta = { + homepage = "https://docs.graylog.org/en/3.3/pages/integrations.html#enterprise"; + description = "Integrations are tools that help Graylog work with external systems (unfree enterprise integrations)"; + license = lib.licenses.unfree; + }; + }; + filter-messagesize = glPlugin rec { + name = "graylog-filter-messagesize-${version}"; + pluginName = "graylog-plugin-filter-messagesize"; + version = "0.0.2"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "1vx62yikd6d3lbwsfiyf9j6kx8drvn4xhffwv27fw5jzhfqr61ji"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-filter-messagesize"; + description = "Prints out all messages that have an estimated size crossing a configured threshold during processing"; + }; + }; + integrations = glPlugin rec { + name = "graylog-integrations-${version}"; + pluginName = "graylog-plugin-integrations"; + version = "3.3.9"; + src = fetchurl { + url = "https://downloads.graylog.org/releases/graylog-integrations/graylog-integrations-plugins-${version}.tgz"; + sha256 = "0q858ffmkinngyqqsaszcrx93zc4fg43ny0xb7vm0p4wd48hjyqc"; + }; + installPhase = '' + mkdir -p $out/bin + tar --strip-components=2 -xf $src + cp ${pluginName}-${version}.jar $out/bin/${pluginName}-${version}.jar + ''; + meta = { + homepage = "https://github.com/Graylog2/graylog-plugin-integrations"; + description = "A collection of open source Graylog integrations that will be released together"; + }; + }; + internal-logs = glPlugin rec { + name = "graylog-internal-logs-${version}"; + pluginName = "graylog-plugin-internal-logs"; + version = "2.4.0"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "1jyy0wkjapv3xv5q957xxv2pcnd4n1yivkvkvg6cx7kv1ip75xwc"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-internal-logs"; + description = "Graylog plugin to record internal logs of Graylog efficiently instead of sending them over the network"; + }; + }; + ipanonymizer = glPlugin rec { + name = "graylog-ipanonymizer-${version}"; + pluginName = "graylog-plugin-ipanonymizer"; + version = "1.1.2"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "0hd66751hp97ddkn29s1cmjmc2h1nrp431bq7d2wq16iyxxlygri"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-ipanonymizer"; + description = "A graylog-server plugin that replaces the last octet of IP addresses in messages with xxx"; + }; + }; + jabber = glPlugin rec { + name = "graylog-jabber-${version}"; + pluginName = "graylog-plugin-jabber"; + version = "2.4.0"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "0zy27q8y0bv7i5nypsfxad4yiw121sbwzd194jsz2w08jhk3skl5"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-jabber"; + description = "Jabber Alarmcallback Plugin for Graylog"; + }; + }; + metrics = glPlugin rec { + name = "graylog-metrics-${version}"; + pluginName = "graylog-plugin-metrics"; + version = "1.3.0"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "1v1yzmqp43kxigh3fymdwki7pn21sk2ym3kk4nn4qv4zzkhz59vp"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-metrics"; + description = "An output plugin for integrating Graphite, Ganglia and StatsD with Graylog"; + }; + }; + mongodb-profiler = glPlugin rec { + name = "graylog-mongodb-profiler-${version}"; + pluginName = "graylog-plugin-mongodb-profiler"; + version = "2.0.1"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "1hadxyawdz234lal3dq5cy3zppl7ixxviw96iallyav83xyi23i8"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-mongodb-profiler"; + description = "Graylog input plugin that reads MongoDB profiler data"; + }; + }; + pagerduty = glPlugin rec { + name = "graylog-pagerduty-${version}"; + pluginName = "graylog-plugin-pagerduty"; + version = "2.0.0"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "0xhcwfwn7c77giwjilv7k7aijnj9azrjbjgd0r3p6wdrw970f27r"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-pagerduty"; + description = "An alarm callback plugin for integrating PagerDuty into Graylog"; + }; + }; + redis = glPlugin rec { + name = "graylog-redis-${version}"; + pluginName = "graylog-plugin-redis"; + version = "0.1.1"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "0dfgh6w293ssagas5y0ixwn0vf54i5iv61r5p2q0rbv2da6xvhbw"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-redis"; + description = "Redis plugin for Graylog"; + }; + }; + slack = glPlugin rec { + name = "graylog-slack-${version}"; + pluginName = "graylog-plugin-slack"; + version = "3.1.0"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "067p8g94b007gypwyyi8vb6qhwdanpk8ah57abik54vv14jxg94k"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-slack"; + description = "Can notify Slack or Mattermost channels about triggered alerts in Graylog (Alarm Callback)"; + }; + }; + snmp = glPlugin rec { + name = "graylog-snmp-${version}"; + pluginName = "graylog-plugin-snmp"; + version = "0.3.0"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "1hkaklwzcsvqq45b98chwqxqdgnnbj4dg68agsll13yq4zx37qpp"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-snmp"; + description = "Graylog plugin to receive SNMP traps"; + }; + }; + spaceweather = glPlugin rec { + name = "graylog-spaceweather-${version}"; + pluginName = "graylog-plugin-spaceweather"; + version = "1.0"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/spaceweather-input.jar"; + sha256 = "1mwqy3fhyy4zdwyrzvbr565xwf96xs9d3l70l0khmrm848xf8wz4"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-spaceweather"; + description = "Correlate proton density to the response time of your app and the ion temperature to your exception rate."; + }; + }; + twiliosms = glPlugin rec { + name = "graylog-twiliosms-${version}"; + pluginName = "graylog-plugin-twiliosms"; + version = "1.0.0"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "0kwfv1zfj0fmxh9i6413bcsaxrn1vdwrzb6dphvg3dx27wxn1j1a"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-twiliosms"; + description = "An alarm callback plugin for integrating the Twilio SMS API into Graylog"; + }; + }; + twitter = glPlugin rec { + name = "graylog-twitter-${version}"; + pluginName = "graylog-plugin-twitter"; + version = "2.0.0"; + src = fetchurl { + url = "https://github.com/graylog-labs/${pluginName}/releases/download/${version}/${pluginName}-${version}.jar"; + sha256 = "1pi34swy9nzq35a823zzvqrjhb6wsg302z31vk2y656sw6ljjxyh"; + }; + meta = { + homepage = "https://github.com/graylog-labs/graylog-plugin-twitter"; + description = "Graylog input plugin that reads Twitter messages based on keywords in realtime"; + }; + }; +} diff --git a/pkgs/overlay.nix b/pkgs/overlay.nix index 273a923ce..61bc95d4f 100644 --- a/pkgs/overlay.nix +++ b/pkgs/overlay.nix @@ -172,6 +172,26 @@ builtins.mapAttrs (_: patchPhps phpLogPermissionPatch) { preBuild = "rm -rf x-pack"; }); + elasticsearch6-oss = + (lib.toDerivation + (getClosureFromStore + /nix/store/gkw63x51dmnyr7v66vf713ni7b8i3z37-elasticsearch-oss-6.8.21) + // { version = "6.8.21"; }); + + graylog-3_3 = + (lib.toDerivation + (getClosureFromStore + /nix/store/yj365yr01p6yp2axj943b4l8ngzzxvkw-graylog-3.3.16) + // { version = "3.3.19"; }); + + graylog = self.graylog-3_3; + + graylogPlugins = lib.recurseIntoAttrs ( + super.callPackage ./graylog/plugins.nix { + graylog = self.graylog-3_3; + } + ); + # Those are specialised packages for "direct consumption" use in our LAMP roles. # PHP versions from vendored nix-phps diff --git a/tests/loghost.nix b/tests/loghost.nix new file mode 100644 index 000000000..3b22ff1e0 --- /dev/null +++ b/tests/loghost.nix @@ -0,0 +1,103 @@ +import ./make-test-python.nix ({ pkgs, lib, testlib, ... }: +let + ipv4 = testlib.fcIP.srv4 1; + ipv6 = testlib.fcIP.srv6 1; + domain = "fcio.net"; + host = "machine.${domain}"; +in { + name = "loghost"; + nodes.machine = + { config, pkgs, ... }: + { + imports = [ + (testlib.fcConfig { }) + ]; + + environment.systemPackages = [ pkgs.tcpdump ]; + + virtualisation.memorySize = 6000; + virtualisation.qemu.options = [ "-smp 2" ]; + + flyingcircus.roles.loghost.enable = true; + networking.domain = "fcio.net"; + + services.telegraf.enable = true; # set in infra/fc but not in infra/testing + + flyingcircus.roles.elasticsearch.heapPercentage = 30; + flyingcircus.services.graylog.heapPercentage = 35; + + users.groups.login = { + members = []; + }; + + flyingcircus.encServices = [ + { service = "loghost-server"; + address = host; + ips = [ ipv4 ipv6 ]; + } + ]; + environment.etc.hosts.source = lib.mkForce (pkgs.writeText "hosts" '' + ${ipv4} ${host} + ${ipv6} ${host} + ''); + + flyingcircus.roles.graylog.publicFrontend = { + enable = true; + hostName = host; + }; + flyingcircus.allowedUnfreePackageNames = [ "mongodb" ]; + + }; + + testScript = { nodes, pkgs, ... }: + let + config = nodes.machine.config; + sensuChecks = config.flyingcircus.services.sensu-client.checks; + graylogCheck = testlib.sensuCheckCmd nodes.machine "graylog_ui"; + graylogApi = "${pkgs.fc.agent}/bin/fc-graylog --api http://${host}:9001/api get -l"; + esConfigFile = "/srv/elasticsearch/config/elasticsearch.yml"; + in '' + machine.wait_for_unit("elasticsearch.service") + + with subtest("elasticsearch config should be set-up for single-node mode"): + machine.succeed("grep 'discovery.type: single-node' ${esConfigFile}") + machine.succeed("grep 'discovery.zen.minimum_master_nodes: 1' ${esConfigFile}") + + with subtest("elasticsearch auto_create_index should be disabled"): + machine.succeed("grep 'action.auto_create_index: false' ${esConfigFile}") + + machine.wait_for_unit("haproxy.service") + machine.wait_for_unit("mongodb.service") + machine.wait_for_unit("graylog.service") + machine.wait_for_unit("nginx.service") + + with subtest("elasticsearch should have a graylog index"): + machine.wait_until_succeeds("curl http://${host}:9200/_cat/indices?v | grep -q graylog_0") + + with subtest("graylog API should respond"): + machine.wait_until_succeeds("${graylogApi} / | grep -q cluster_id") + + with subtest("config script must create telegraf user"): + machine.wait_for_unit("fc-graylog-config.service") + machine.succeed("${graylogApi} /users | grep -q telegraf-machine") + + with subtest("public HTTPS should serve graylog dashboard"): + machine.wait_until_succeeds("curl -k https://${host} | grep -q 'Graylog Web Interface'") + + with subtest("sensu check should be green"): + machine.succeed("${graylogCheck}") + + with subtest("sensu check should be red after shutting down graylog"): + machine.stop_job("graylog.service") + machine.wait_until_fails("${graylogApi} / | grep -q cluster_id") + machine.fail("${graylogCheck}") + + with subtest("service user should be able to write to local config dir"): + machine.succeed('sudo -u graylog touch /etc/local/graylog/graylog.json') + + with subtest("secret files should have correct permissions"): + machine.succeed("stat /etc/local/graylog/password -c %a:%U:%G | grep '660:graylog:service'") + machine.succeed("stat /etc/local/graylog/password_secret -c %a:%U:%G | grep '660:graylog:service'") + machine.succeed("stat /run/graylog/graylog.conf -c %a:%U:%G | grep '440:graylog:service'") + ''; +})