From fd641c05acf56f306a673ce85e3df11ec73cd189 Mon Sep 17 00:00:00 2001 From: Michael Hoffmann Date: Fri, 2 Feb 2024 09:17:17 +0100 Subject: [PATCH] misc: use re2 if configued Signed-off-by: Michael Hoffmann --- Makefile | 2 ++ README.rst | 1 + journalpump/journalpump.py | 8 +++++++- journalpump/senders/base.py | 8 +++++++- journalpump/senders/elasticsearch_opensearch_sender.py | 6 +----- requirements.txt | 1 + systest/test_rsyslog.py | 8 +++++++- test/test_journalpump.py | 8 +++++++- 8 files changed, 33 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index abac0bd..9837690 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,8 @@ SHELL=/bin/bash short_ver = 2.1.3 long_ver = $(shell git describe --long 2>/dev/null || echo $(short_ver)-0-unknown-g`git describe --always`) +USE_RE2=${USE_RE2} + all: py-egg PYTHON ?= python3 diff --git a/README.rst b/README.rst index bce20ee..64bf64f 100644 --- a/README.rst +++ b/README.rst @@ -370,6 +370,7 @@ Using backrefs, the message can also be restructured into a new format. } ] +Secret filters and searches can be made to use re2 as a regex engine by running journalpump with the environment "USE_RE2=yes". Make sure that the PyPI package "google_re2" is installed with at least version 1.1 ``secret_filter_metrics`` ( default: ``false``) Change this setting to true to emit metrics to the metrics host whenever a secret pattern is matched. diff --git a/journalpump/journalpump.py b/journalpump/journalpump.py index abbd9c3..7f20cdf 100644 --- a/journalpump/journalpump.py +++ b/journalpump/journalpump.py @@ -17,11 +17,17 @@ import fnmatch import json import logging -import re +import os import select import time import uuid +# NOTE: make sure to use google-re >= 1.1 if this is enabled. +if os.environ.get("USE_RE2"): + import re2 as re +else: + import re # type: ignore[no-redef] + _5_MB = 5 * 1024 * 1024 CHUNK_SIZE = 5000 diff --git a/journalpump/senders/base.py b/journalpump/senders/base.py index 4c7d458..267e3e8 100644 --- a/journalpump/senders/base.py +++ b/journalpump/senders/base.py @@ -2,11 +2,17 @@ from typing import Dict, Optional import logging +import os import random -import re import sys import time +# NOTE: make sure to use google-re >= 1.1 +if os.environ.get("USE_RE2"): + import re2 as re +else: + import re # type: ignore[no-redef] + KAFKA_COMPRESSED_MESSAGE_OVERHEAD = 30 MAX_KAFKA_MESSAGE_SIZE = 1024**2 # 1 MiB diff --git a/journalpump/senders/elasticsearch_opensearch_sender.py b/journalpump/senders/elasticsearch_opensearch_sender.py index d377901..e68a1cb 100644 --- a/journalpump/senders/elasticsearch_opensearch_sender.py +++ b/journalpump/senders/elasticsearch_opensearch_sender.py @@ -10,7 +10,6 @@ import enum import json -import re import time @@ -72,8 +71,6 @@ def create(*, sender_type: SenderType, config: Dict[str, Any]) -> "Config": class _EsOsLogSenderBase(LogSender): _DEFAULT_MAX_SENDER_INTERVAL = 10.0 - _INDICIES_URL_REDACTION_REGEXP = r"(\w*?://[A-Za-z0-9\-._~%!$&'()*+,;=]*)(:)([A-Za-z0-9\-._~%!$&'()*+,;=]*)(@)" - _ONE_HOUR_LAST_INDEX_CHECK = 3600 _SUCCESS_HTTP_STATUSES = {HTTPStatus.OK, HTTPStatus.CREATED} @@ -173,8 +170,7 @@ def send_messages(self, *, messages, cursor) -> bool: try: es_available = self._load_indices() if not es_available: - redacted_url = re.sub(self._INDICIES_URL_REDACTION_REGEXP, r"\1\2[REDACTED]\4", self._indices_url) - self.log.warning("Waiting for connection to %s for %s", redacted_url, self.name) + self.log.warning("Waiting for connection for %s", self.name) self._backoff() return False for msg in messages: diff --git a/requirements.txt b/requirements.txt index f175b49..29dcecf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ google-auth geoip2 https://github.com/systemd/python-systemd/zipball/master typing-extensions +google-re2 diff --git a/systest/test_rsyslog.py b/systest/test_rsyslog.py index b3da4eb..d2489df 100644 --- a/systest/test_rsyslog.py +++ b/systest/test_rsyslog.py @@ -13,11 +13,17 @@ import logging.handlers import os import random -import re import socket import string import threading +# NOTE: make sure to use google-re >= 1.1 if this is enabled. +if os.environ.get("USE_RE2"): + import re2 as re +else: + import re # type: ignore[no-redef] + + RSYSLOGD = "/usr/sbin/rsyslogd" RSYSLOGD_TCP_CONF = """ diff --git a/test/test_journalpump.py b/test/test_journalpump.py index f0df82f..dbcd63f 100644 --- a/test/test_journalpump.py +++ b/test/test_journalpump.py @@ -29,10 +29,16 @@ import botocore.session import json +import os import pytest -import re import responses +# NOTE: make sure to use google-re >= 1.1 if this is enabled. +if os.environ.get("USE_RE2"): + import re2 as re +else: + import re # type: ignore[no-redef] + def test_journalpump_init(tmpdir): # pylint: disable=too-many-statements # Logplex sender