From 13f0a6e842acc603c68896a3f2cbaf18a3ca454e Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Wed, 4 Sep 2024 16:11:38 +0200 Subject: [PATCH 1/7] add extra tags for uploaded Postgres log files --- .../scripts/upload_pg_log_to_s3.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/postgres-appliance/scripts/upload_pg_log_to_s3.py b/postgres-appliance/scripts/upload_pg_log_to_s3.py index dd6e1fd83..df5a66d08 100755 --- a/postgres-appliance/scripts/upload_pg_log_to_s3.py +++ b/postgres-appliance/scripts/upload_pg_log_to_s3.py @@ -7,6 +7,7 @@ import subprocess import sys import time +import croniter from datetime import datetime, timedelta @@ -16,12 +17,21 @@ logger = logging.getLogger(__name__) -def compress_pg_log(): +def generate_file_name(): + schedule = os.getenv('LOG_SHIP_SCHEDULE') + itr = croniter(schedule, datetime.now() - timedelta(minutes=1)) + prev_log = itr.get_prev(datetime.datetime) yesterday = datetime.now() - timedelta(days=1) yesterday_day_number = yesterday.strftime('%u') log_file = os.path.join(os.getenv('PGLOG'), 'postgresql-' + yesterday_day_number + '.csv') - archived_log_file = os.path.join(os.getenv('LOG_TMPDIR'), yesterday.strftime('%F') + '.csv.gz') + archived_log_file = os.path.join(os.getenv('LOG_TMPDIR'), prev_log.strftime('%F') + '.csv.gz') + + return log_file, archived_log_file + + +def compress_pg_log(): + log_file, archived_log_file = generate_file_name() if os.path.getsize(log_file) == 0: logger.warning("Postgres log from yesterday '%s' is empty.", log_file) @@ -53,9 +63,10 @@ def upload_to_s3(local_file_path): chunk_size = 52428800 # 50 MiB config = TransferConfig(multipart_threshold=chunk_size, multipart_chunksize=chunk_size) + tags = {'LogEndpoint': os.getenv('LOG_S3_ENDPOINT'), 'Namespace': os.getenv('POD_NAMESPACE'), 'ClusterName': os.getenv('SCOPE')} try: - bucket.upload_file(local_file_path, key_name, Config=config) + bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs=tags) except S3UploadFailedError as e: logger.exception('Failed to upload the %s to the bucket %s under the key %s. Exception: %r', local_file_path, bucket_name, key_name, e) From 6567859996f51649a2b0d27612cf61a8e918de65 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Wed, 4 Sep 2024 16:22:03 +0200 Subject: [PATCH 2/7] make tags pretty print --- postgres-appliance/scripts/upload_pg_log_to_s3.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/postgres-appliance/scripts/upload_pg_log_to_s3.py b/postgres-appliance/scripts/upload_pg_log_to_s3.py index df5a66d08..7e43573b3 100755 --- a/postgres-appliance/scripts/upload_pg_log_to_s3.py +++ b/postgres-appliance/scripts/upload_pg_log_to_s3.py @@ -63,7 +63,11 @@ def upload_to_s3(local_file_path): chunk_size = 52428800 # 50 MiB config = TransferConfig(multipart_threshold=chunk_size, multipart_chunksize=chunk_size) - tags = {'LogEndpoint': os.getenv('LOG_S3_ENDPOINT'), 'Namespace': os.getenv('POD_NAMESPACE'), 'ClusterName': os.getenv('SCOPE')} + tags = { + 'LogEndpoint': os.getenv('LOG_S3_ENDPOINT'), + 'Namespace': os.getenv('POD_NAMESPACE'), + 'ClusterName': os.getenv('SCOPE') + } try: bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs=tags) From 9d1b54f062ec9d3e9770d2bf458cab896f4b3105 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Thu, 5 Sep 2024 07:14:01 +0200 Subject: [PATCH 3/7] fix tagging --- postgres-appliance/scripts/upload_pg_log_to_s3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/postgres-appliance/scripts/upload_pg_log_to_s3.py b/postgres-appliance/scripts/upload_pg_log_to_s3.py index 7e43573b3..61c6e19b9 100755 --- a/postgres-appliance/scripts/upload_pg_log_to_s3.py +++ b/postgres-appliance/scripts/upload_pg_log_to_s3.py @@ -64,13 +64,13 @@ def upload_to_s3(local_file_path): chunk_size = 52428800 # 50 MiB config = TransferConfig(multipart_threshold=chunk_size, multipart_chunksize=chunk_size) tags = { - 'LogEndpoint': os.getenv('LOG_S3_ENDPOINT'), 'Namespace': os.getenv('POD_NAMESPACE'), 'ClusterName': os.getenv('SCOPE') } + tags_str = "&".join(f"{key}={value}" for key, value in tags.items()) try: - bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs=tags) + bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs={'Tagging': tags_str}) except S3UploadFailedError as e: logger.exception('Failed to upload the %s to the bucket %s under the key %s. Exception: %r', local_file_path, bucket_name, key_name, e) From 896c89ccf709643986c8b1c5719c78be8d9e9d2c Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 6 Sep 2024 14:12:25 +0200 Subject: [PATCH 4/7] introduce new env variable to specify S3 tags --- ENVIRONMENT.rst | 1 + postgres-appliance/scripts/configure_spilo.py | 3 ++- .../scripts/upload_pg_log_to_s3.py | 27 +++++++------------ 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/ENVIRONMENT.rst b/ENVIRONMENT.rst index 6875fa68b..c2d8cd459 100644 --- a/ENVIRONMENT.rst +++ b/ENVIRONMENT.rst @@ -90,6 +90,7 @@ Environment Configuration Settings - **AZURE_TENANT_ID**: (optional) Tenant ID of the Service Principal - **CALLBACK_SCRIPT**: the callback script to run on various cluster actions (on start, on stop, on restart, on role change). The script will receive the cluster name, connection string and the current action. See `Patroni `__ documentation for details. - **LOG_S3_BUCKET**: path to the S3 bucket used for PostgreSQL daily log files (i.e. foobar, without `s3://` prefix). Spilo will add `/spilo/{LOG_BUCKET_SCOPE_PREFIX}{SCOPE}{LOG_BUCKET_SCOPE_SUFFIX}/log/` to that path. Logs are shipped if this variable is set. +- **LOG_S3_TAGS**: map of key value pairs to be used for tagging files uploaded to S3. Values should be referencing existing environment variables. - **LOG_SHIP_SCHEDULE**: cron schedule for shipping compressed logs from ``pg_log`` (if this feature is enabled, '00 02 * * *' by default) - **LOG_ENV_DIR**: directory to store environment variables necessary for log shipping - **LOG_TMPDIR**: directory to store temporary compressed daily log files. PGROOT/../tmp by default. diff --git a/postgres-appliance/scripts/configure_spilo.py b/postgres-appliance/scripts/configure_spilo.py index 3195327fb..66c1deb51 100755 --- a/postgres-appliance/scripts/configure_spilo.py +++ b/postgres-appliance/scripts/configure_spilo.py @@ -583,6 +583,7 @@ def get_placeholders(provider): placeholders.setdefault('LOG_SHIP_SCHEDULE', '1 0 * * *') placeholders.setdefault('LOG_S3_BUCKET', '') placeholders.setdefault('LOG_S3_ENDPOINT', '') + placeholders.setdefault('LOG_S3_TAGS', '{}') placeholders.setdefault('LOG_TMPDIR', os.path.abspath(os.path.join(placeholders['PGROOT'], '../tmp'))) placeholders.setdefault('LOG_BUCKET_SCOPE_SUFFIX', '') @@ -771,7 +772,7 @@ def write_log_environment(placeholders): if not os.path.exists(log_env['LOG_ENV_DIR']): os.makedirs(log_env['LOG_ENV_DIR']) - for var in ('LOG_TMPDIR', 'LOG_AWS_REGION', 'LOG_S3_ENDPOINT', 'LOG_S3_KEY', 'LOG_S3_BUCKET', 'PGLOG'): + for var in ('LOG_TMPDIR', 'LOG_AWS_REGION', 'LOG_S3_ENDPOINT', 'LOG_S3_KEY', 'LOG_S3_BUCKET', 'LOG_S3_TAGS', 'PGLOG'): write_file(log_env[var], os.path.join(log_env['LOG_ENV_DIR'], var), True) diff --git a/postgres-appliance/scripts/upload_pg_log_to_s3.py b/postgres-appliance/scripts/upload_pg_log_to_s3.py index 61c6e19b9..39bcc6ef1 100755 --- a/postgres-appliance/scripts/upload_pg_log_to_s3.py +++ b/postgres-appliance/scripts/upload_pg_log_to_s3.py @@ -7,7 +7,6 @@ import subprocess import sys import time -import croniter from datetime import datetime, timedelta @@ -17,21 +16,12 @@ logger = logging.getLogger(__name__) -def generate_file_name(): - schedule = os.getenv('LOG_SHIP_SCHEDULE') - itr = croniter(schedule, datetime.now() - timedelta(minutes=1)) - prev_log = itr.get_prev(datetime.datetime) +def compress_pg_log(): yesterday = datetime.now() - timedelta(days=1) yesterday_day_number = yesterday.strftime('%u') log_file = os.path.join(os.getenv('PGLOG'), 'postgresql-' + yesterday_day_number + '.csv') - archived_log_file = os.path.join(os.getenv('LOG_TMPDIR'), prev_log.strftime('%F') + '.csv.gz') - - return log_file, archived_log_file - - -def compress_pg_log(): - log_file, archived_log_file = generate_file_name() + archived_log_file = os.path.join(os.getenv('LOG_TMPDIR'), yesterday.strftime('%F') + '.csv.gz') if os.path.getsize(log_file) == 0: logger.warning("Postgres log from yesterday '%s' is empty.", log_file) @@ -63,14 +53,15 @@ def upload_to_s3(local_file_path): chunk_size = 52428800 # 50 MiB config = TransferConfig(multipart_threshold=chunk_size, multipart_chunksize=chunk_size) - tags = { - 'Namespace': os.getenv('POD_NAMESPACE'), - 'ClusterName': os.getenv('SCOPE') - } - tags_str = "&".join(f"{key}={value}" for key, value in tags.items()) + tags = eval(os.getenv('LOG_S3_TAGS')) + s3_tags = {} + for key, value in tags.items(): + s3_tags[key] = os.getenv(value) + + s3_tags_str = "&".join(f"{key}={value}" for key, value in s3_tags.items()) try: - bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs={'Tagging': tags_str}) + bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs={'Tagging': s3_tags_str}) except S3UploadFailedError as e: logger.exception('Failed to upload the %s to the bucket %s under the key %s. Exception: %r', local_file_path, bucket_name, key_name, e) From 97e3dcb4bdedf9edf6bbea249daf7607dcd28b64 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 6 Sep 2024 14:23:16 +0200 Subject: [PATCH 5/7] fix flake err --- postgres-appliance/scripts/configure_spilo.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/postgres-appliance/scripts/configure_spilo.py b/postgres-appliance/scripts/configure_spilo.py index 66c1deb51..eb4f5e1db 100755 --- a/postgres-appliance/scripts/configure_spilo.py +++ b/postgres-appliance/scripts/configure_spilo.py @@ -772,7 +772,13 @@ def write_log_environment(placeholders): if not os.path.exists(log_env['LOG_ENV_DIR']): os.makedirs(log_env['LOG_ENV_DIR']) - for var in ('LOG_TMPDIR', 'LOG_AWS_REGION', 'LOG_S3_ENDPOINT', 'LOG_S3_KEY', 'LOG_S3_BUCKET', 'LOG_S3_TAGS', 'PGLOG'): + for var in ('LOG_TMPDIR', + 'LOG_AWS_REGION', + 'LOG_S3_ENDPOINT', + 'LOG_S3_KEY', + 'LOG_S3_BUCKET', + 'LOG_S3_TAGS', + 'PGLOG'): write_file(log_env[var], os.path.join(log_env['LOG_ENV_DIR'], var), True) From 36b016d75843ef1ddd3d25a3b7bc57ead933ddfc Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Tue, 17 Sep 2024 15:41:06 +0200 Subject: [PATCH 6/7] reflect feedback --- postgres-appliance/scripts/upload_pg_log_to_s3.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/postgres-appliance/scripts/upload_pg_log_to_s3.py b/postgres-appliance/scripts/upload_pg_log_to_s3.py index 39bcc6ef1..3bf5cda75 100755 --- a/postgres-appliance/scripts/upload_pg_log_to_s3.py +++ b/postgres-appliance/scripts/upload_pg_log_to_s3.py @@ -54,11 +54,7 @@ def upload_to_s3(local_file_path): chunk_size = 52428800 # 50 MiB config = TransferConfig(multipart_threshold=chunk_size, multipart_chunksize=chunk_size) tags = eval(os.getenv('LOG_S3_TAGS')) - s3_tags = {} - for key, value in tags.items(): - s3_tags[key] = os.getenv(value) - - s3_tags_str = "&".join(f"{key}={value}" for key, value in s3_tags.items()) + s3_tags_str = "&".join(f"{key}={os.getenv(value)}" for key, value in tags.items()) try: bucket.upload_file(local_file_path, key_name, Config=config, ExtraArgs={'Tagging': s3_tags_str}) From 061f74be2ddde87f11c10ae5003ca03fca6fcfbe Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Wed, 18 Sep 2024 16:35:54 +0200 Subject: [PATCH 7/7] provide LOG_S3_TAGS example --- ENVIRONMENT.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ENVIRONMENT.rst b/ENVIRONMENT.rst index c2d8cd459..158b9a868 100644 --- a/ENVIRONMENT.rst +++ b/ENVIRONMENT.rst @@ -90,7 +90,7 @@ Environment Configuration Settings - **AZURE_TENANT_ID**: (optional) Tenant ID of the Service Principal - **CALLBACK_SCRIPT**: the callback script to run on various cluster actions (on start, on stop, on restart, on role change). The script will receive the cluster name, connection string and the current action. See `Patroni `__ documentation for details. - **LOG_S3_BUCKET**: path to the S3 bucket used for PostgreSQL daily log files (i.e. foobar, without `s3://` prefix). Spilo will add `/spilo/{LOG_BUCKET_SCOPE_PREFIX}{SCOPE}{LOG_BUCKET_SCOPE_SUFFIX}/log/` to that path. Logs are shipped if this variable is set. -- **LOG_S3_TAGS**: map of key value pairs to be used for tagging files uploaded to S3. Values should be referencing existing environment variables. +- **LOG_S3_TAGS**: map of key value pairs to be used for tagging files uploaded to S3. Values should be referencing existing environment variables e.g. ``{"ClusterName": "SCOPE", "Namespace": "POD_NAMESPACE"}`` - **LOG_SHIP_SCHEDULE**: cron schedule for shipping compressed logs from ``pg_log`` (if this feature is enabled, '00 02 * * *' by default) - **LOG_ENV_DIR**: directory to store environment variables necessary for log shipping - **LOG_TMPDIR**: directory to store temporary compressed daily log files. PGROOT/../tmp by default.