From 6aa7e81e56e6574d108b41c89eca3484db4b14d4 Mon Sep 17 00:00:00 2001 From: Dmitriy Kruglov Date: Thu, 14 Nov 2024 22:43:30 +0100 Subject: [PATCH] improvement(tls-certs): collect and keep TLS/SSL artifacts Collect SSL configuration from SCT runner (node certificates/keys; CA certificate/key; etc.) and node specific certificates from DB/loader nodes. Keep them after a test is finished, similarly to how logs are collected and published. This will facilitate root cause analysis of SCT failures caused by certificate related issues. Closes: https://github.com/scylladb/scylla-cluster-tests/issues/9133 --- sdcm/logcollector.py | 28 ++++++++++++++++++++++++++++ sdcm/tester.py | 8 +++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/sdcm/logcollector.py b/sdcm/logcollector.py index 61f84a2c30..fc6643c8d9 100644 --- a/sdcm/logcollector.py +++ b/sdcm/logcollector.py @@ -754,6 +754,14 @@ class ScyllaLogCollector(LogCollector): CommandLog(name='system_schema_tables.log', command='cat system_schema_tables.log'), ] + + cmd = "test -f /etc/scylla/ssl_conf/{0} && cat /etc/scylla/ssl_conf/{0}" + log_entities.extend([ + FileLog(name='db.crt', command=cmd.format('db.crt')), + FileLog(name='client-facing.crt', command=cmd.format('client-facing.crt')), + FileLog(name='cqlshrc', command=cmd.format('client/cqlshrc')), + ]) + cluster_log_type = "db-cluster" cluster_dir_prefix = "db-cluster" collect_timeout = 600 @@ -839,6 +847,8 @@ class LoaderLogCollector(LogCollector): search_locally=True), FileLog(name='*latte*', search_locally=True), + FileLog(name='test.crt', + command="test -f /etc/scylla/ssl_conf/{0} && cat /etc/scylla/ssl_conf/{0}".format('test.crt')) ] def collect_logs(self, local_search_path=None) -> list[str]: @@ -1301,6 +1311,22 @@ def collect_logs(self, local_search_path: Optional[str] = None) -> list[str]: # return [s3_link] +class SSLConfCollector(BaseSCTLogCollector): + """ + Cluster SSL/TLS configuration collector. + + Collects all SSL/TLS artifacts generated by the SCT runner for the test. + This includes: + - Certificates and keys for nodes and the CA + - Java truststore for c-s commands + - cqlshrc configuration file + """ + log_entities = [ + DirLog(name='ssl_conf/*', search_locally=True), + ] + cluster_log_type = 'ssl-conf' + + class Collector: # pylint: disable=too-many-instance-attributes, """Collector instance @@ -1352,6 +1378,8 @@ def __init__(self, test_id=None, test_dir=None, params=None): JepsenLogCollector: self.loader_set, ParallelTimelinesReportCollector: self.pt_report_set, } + if self.params.get('server_encrypt') or self.params.get('client_encrypt'): + self.cluster_log_collectors[SSLConfCollector] = self.sct_set @property def test_id(self): diff --git a/sdcm/tester.py b/sdcm/tester.py index b8eab86269..a77ebde542 100644 --- a/sdcm/tester.py +++ b/sdcm/tester.py @@ -11,6 +11,7 @@ # # Copyright (c) 2016 ScyllaDB # pylint: disable=too-many-lines +import shutil from collections import defaultdict from copy import deepcopy from concurrent.futures import ThreadPoolExecutor, as_completed @@ -24,6 +25,7 @@ import traceback import unittest import unittest.mock +from pathlib import Path from typing import NamedTuple, Optional, Union, List, Dict, Any from uuid import uuid4 from functools import wraps, cache @@ -82,7 +84,7 @@ from sdcm.utils.common import format_timestamp, wait_ami_available, \ download_dir_from_cloud, get_post_behavior_actions, get_testrun_status, download_encrypt_keys, rows_to_list, \ make_threads_be_daemonic_by_default, ParallelObject, clear_out_all_exit_hooks, change_default_password, \ - parse_python_thread_command + parse_python_thread_command, get_data_dir_path from sdcm.utils.cql_utils import cql_quote_if_needed from sdcm.utils.database_query_utils import PartitionsValidationAttributes, fetch_all_rows from sdcm.utils.features import is_tablets_feature_enabled @@ -669,6 +671,9 @@ def argus_collect_gemini_results(self): except Exception: # pylint: disable=broad-except # noqa: BLE001 self.log.warning("Error submitting gemini results to argus", exc_info=True) + def collect_ssl_conf(self): + shutil.copytree(Path(get_data_dir_path('ssl_conf')), Path(self.logdir) / 'ssl_conf') + def _init_data_validation(self): if data_validation := self.params.get('data_validation'): data_validation_params = yaml.safe_load(data_validation) @@ -2991,6 +2996,7 @@ def tearDown(self): self.monitors.update_default_time_range(self.start_time, time.time()) if self.params.get('collect_logs'): self.collect_logs() + self.collect_ssl_conf() self.clean_resources() if self.create_stats: self.update_test_with_errors()