From fde34d4a8717c0f4cf09853cc8764567f0faa37f Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Thu, 19 Sep 2024 11:46:02 +0200 Subject: [PATCH] add extra tags for uploaded Postgres log files (#1024) --- ENVIRONMENT.rst | 1 + postgres-appliance/scripts/configure_spilo.py | 9 ++++++++- postgres-appliance/scripts/upload_pg_log_to_s3.py | 4 +++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ENVIRONMENT.rst b/ENVIRONMENT.rst index 6875fa68b..158b9a868 100644 --- a/ENVIRONMENT.rst +++ b/ENVIRONMENT.rst @@ -90,6 +90,7 @@ Environment Configuration Settings - **AZURE_TENANT_ID**: (optional) Tenant ID of the Service Principal - **CALLBACK_SCRIPT**: the callback script to run on various cluster actions (on start, on stop, on restart, on role change). The script will receive the cluster name, connection string and the current action. See `Patroni `__ documentation for details. - **LOG_S3_BUCKET**: path to the S3 bucket used for PostgreSQL daily log files (i.e. foobar, without `s3://` prefix). Spilo will add `/spilo/{LOG_BUCKET_SCOPE_PREFIX}{SCOPE}{LOG_BUCKET_SCOPE_SUFFIX}/log/` to that path. Logs are shipped if this variable is set. +- **LOG_S3_TAGS**: map of key value pairs to be used for tagging files uploaded to S3. Values should be referencing existing environment variables e.g. ``{"ClusterName": "SCOPE", "Namespace": "POD_NAMESPACE"}`` - **LOG_SHIP_SCHEDULE**: cron schedule for shipping compressed logs from ``pg_log`` (if this feature is enabled, '00 02 * * *' by default) - **LOG_ENV_DIR**: directory to store environment variables necessary for log shipping - **LOG_TMPDIR**: directory to store temporary compressed daily log files. PGROOT/../tmp by default. diff --git a/postgres-appliance/scripts/configure_spilo.py b/postgres-appliance/scripts/configure_spilo.py index 3195327fb..eb4f5e1db 100755 --- a/postgres-appliance/scripts/configure_spilo.py +++ b/postgres-appliance/scripts/configure_spilo.py @@ -583,6 +583,7 @@ def get_placeholders(provider): placeholders.setdefault('LOG_SHIP_SCHEDULE', '1 0 * * *') placeholders.setdefault('LOG_S3_BUCKET', '') placeholders.setdefault('LOG_S3_ENDPOINT', '') + placeholders.setdefault('LOG_S3_TAGS', '{}') placeholders.setdefault('LOG_TMPDIR', os.path.abspath(os.path.join(placeholders['PGROOT'], '../tmp'))) placeholders.setdefault('LOG_BUCKET_SCOPE_SUFFIX', '') @@ -771,7 +772,13 @@ def write_log_environment(placeholders): if not os.path.exists(log_env['LOG_ENV_DIR']): os.makedirs(log_env['LOG_ENV_DIR']) - for var in ('LOG_TMPDIR', 'LOG_AWS_REGION', 'LOG_S3_ENDPOINT', 'LOG_S3_KEY', 'LOG_S3_BUCKET', 'PGLOG'): + for var in ('LOG_TMPDIR', + 'LOG_AWS_REGION', + 'LOG_S3_ENDPOINT', + 'LOG_S3_KEY', + 'LOG_S3_BUCKET', + 'LOG_S3_TAGS', + 'PGLOG'): write_file(log_env[var], os.path.join(log_env['LOG_ENV_DIR'], var), True) diff --git a/postgres-appliance/scripts/upload_pg_log_to_s3.py b/postgres-appliance/scripts/upload_pg_log_to_s3.py index dd6e1fd83..3bf5cda75 100755 --- a/postgres-appliance/scripts/upload_pg_log_to_s3.py +++ b/postgres-appliance/scripts/upload_pg_log_to_s3.py @@ -53,9 +53,11 @@ def upload_to_s3(local_file_path): chunk_size = 52428800 # 50 MiB config = TransferConfig(multipart_threshold=chunk_size, multipart_chunksize=chunk_size) + tags = eval(os.getenv('LOG_S3_TAGS')) + s3_tags_str = "&".join(f"{key}={os.getenv(value)}" for key, value in tags.items()) try: - bucket.upload_file(local_file_path, key_name, Config=config) + bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs={'Tagging': s3_tags_str}) except S3UploadFailedError as e: logger.exception('Failed to upload the %s to the bucket %s under the key %s. Exception: %r', local_file_path, bucket_name, key_name, e)