From 48cb7aa608a7a1ef5304fb510361b70832410f7a Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Tue, 12 Nov 2024 17:25:34 +0100 Subject: [PATCH 1/5] Allow partition/environment extras to be used also as feature constraints --- docs/config_reference.rst | 4 +++- reframe/core/pipeline.py | 11 +++++++++-- reframe/core/runtime.py | 11 +++++++---- unittests/test_pipeline.py | 39 ++++++++++++++++++++++++++++++++------ 4 files changed, 52 insertions(+), 13 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 04d6959d60..cd7d8e510a 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -601,7 +601,7 @@ System Partition Configuration :required: No :default: ``[]`` - A list of job scheduler `resource specification <#custom-job-scheduler-resources>`__ objects. + A list of job scheduler :ref:`resource specification ` objects. .. py:attribute:: systems.partitions.processor @@ -747,6 +747,8 @@ ReFrame can launch containerized applications, but you need to configure properl If specified in conjunction with :attr:`~systems.partitions.container_platforms.env_vars`, it will be ignored. +.. _scheduler-resources: + Custom Job Scheduler Resources ============================== diff --git a/reframe/core/pipeline.py b/reframe/core/pipeline.py index 8c3ab5e46c..5663f6e111 100644 --- a/reframe/core/pipeline.py +++ b/reframe/core/pipeline.py @@ -261,11 +261,18 @@ def pipeline_hooks(cls): #: of the :attr:`valid_systems` list, in which case an AND operation on #: these constraints is implied. For example, the test defining the #: following will be valid for all systems that have define both ``feat1`` - #: and ``feat2`` and set ``foo=1`` + #: and ``feat2`` and set ``foo=1``: #: #: .. code-block:: python #: - #: valid_systems = ['+feat1 +feat2 %foo=1'] + #: valid_systems = [r'+feat1 +feat2 %foo=1'] + #: + #: Any partition/environment extra or + #: :ref:`partition resource ` can be specified as a + #: feature constraint without having to explicitly state this in the + #: partition's/environment's feature list. For example, if ``key1`` is part + #: of the partition/environment extras list, then ``+key1`` will select + #: that partition or environment. #: #: For key/value pairs comparisons, ReFrame will automatically convert the #: value in the key/value spec to the type of the value of the diff --git a/reframe/core/runtime.py b/reframe/core/runtime.py index 24d754b991..e3fbce980f 100644 --- a/reframe/core/runtime.py +++ b/reframe/core/runtime.py @@ -311,11 +311,13 @@ def _is_valid_part(part, valid_systems): props[key] = val have_plus_feats = all( - ft in part.features or ft in part.resources + (ft in part.features or + ft in part.resources or ft in part.extras) for ft in plus_feats ) have_minus_feats = any( - ft in part.features or ft in part.resources + (ft in part.features or + ft in part.resources or ft in part.extras) for ft in minus_feats ) try: @@ -357,8 +359,9 @@ def _is_valid_env(env, valid_prog_environs): key, val = subspec[1:].split('=') props[key] = val - have_plus_feats = all(ft in env.features for ft in plus_feats) - have_minus_feats = any(ft in env.features + have_plus_feats = all(ft in env.features or ft in env.extras + for ft in plus_feats) + have_minus_feats = any(ft in env.features or ft in env.extras for ft in minus_feats) try: have_props = True diff --git a/unittests/test_pipeline.py b/unittests/test_pipeline.py index e5010e255c..6807088b91 100644 --- a/unittests/test_pipeline.py +++ b/unittests/test_pipeline.py @@ -556,7 +556,7 @@ def _assert_supported(valid_systems, valid_prog_environs, # Check AND in features and extras _assert_supported( - valid_systems=['+cuda +mpi %gpu_arch=v100'], + valid_systems=[r'+cuda +mpi %gpu_arch=v100'], valid_prog_environs=['*'], expected={} ) @@ -566,9 +566,18 @@ def _assert_supported(valid_systems, valid_prog_environs, expected={} ) - # Check OR in features ad extras + # Check OR in features and extras _assert_supported( - valid_systems=['+cuda +mpi', '%gpu_arch=v100'], + valid_systems=['+cuda +mpi', r'%gpu_arch=v100'], + valid_prog_environs=['*'], + expected={ + 'testsys:gpu': ['PrgEnv-gnu', 'builtin'] + } + ) + + # Check that extra keys can used as features + _assert_supported( + valid_systems=['+cuda +mpi', '+gpu_arch'], valid_prog_environs=['*'], expected={ 'testsys:gpu': ['PrgEnv-gnu', 'builtin'] @@ -640,7 +649,7 @@ def _assert_supported(valid_systems, valid_prog_environs, ) _assert_supported( valid_systems=['*'], - valid_prog_environs=['%bar=x'], + valid_prog_environs=[r'%bar=x'], expected={ 'testsys:gpu': [], 'testsys:login': ['PrgEnv-gnu'] @@ -648,7 +657,7 @@ def _assert_supported(valid_systems, valid_prog_environs, ) _assert_supported( valid_systems=['*'], - valid_prog_environs=['%foo=2'], + valid_prog_environs=[r'%foo=2'], expected={ 'testsys:gpu': ['PrgEnv-gnu'], 'testsys:login': [] @@ -656,7 +665,7 @@ def _assert_supported(valid_systems, valid_prog_environs, ) _assert_supported( valid_systems=['*'], - valid_prog_environs=['%foo=bar'], + valid_prog_environs=[r'%foo=bar'], expected={ 'testsys:gpu': [], 'testsys:login': [] @@ -671,6 +680,24 @@ def _assert_supported(valid_systems, valid_prog_environs, } ) + # Check that extra keys can used as features + _assert_supported( + valid_systems=['*'], + valid_prog_environs=['+foo +bar'], + expected={ + 'testsys:gpu': ['PrgEnv-gnu'], + 'testsys:login': ['PrgEnv-gnu'] + } + ) + _assert_supported( + valid_systems=['*'], + valid_prog_environs=['+foo -bar'], + expected={ + 'testsys:gpu': [], + 'testsys:login': [] + } + ) + # Check valid_systems / valid_prog_environs combinations _assert_supported( valid_systems=['testsys:login'], From 89b0225744ae6516b5ea53ff313bb8a3be68e4b1 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 13 Nov 2024 00:53:59 +0100 Subject: [PATCH 2/5] Make directory prefix for topology files configurable --- docs/config_reference.rst | 18 +++++++++++++++++- reframe/frontend/autodetect.py | 2 +- reframe/schemas/config.json | 1 + 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/docs/config_reference.rst b/docs/config_reference.rst index 04d6959d60..b8573bfbb9 100644 --- a/docs/config_reference.rst +++ b/docs/config_reference.rst @@ -639,12 +639,19 @@ System Partition Configuration In case of errors during auto-detection, ReFrame will simply issue a warning and continue. + .. note:: + + The directory prefix for storing topology information is configurable through the :attr:`~config.general.topology_prefix` configuration option. + .. versionadded:: 3.5.0 .. versionchanged:: 3.7.0 ReFrame is now able to detect the processor information automatically. + .. versionchanged:: 4.7 + Directory prefix for topology files is now configurable. + .. py:attribute:: systems.partitions.devices @@ -1855,6 +1862,16 @@ General Configuration +.. py:attribute:: general.topology_prefix + + :required: No + :default: ``"${HOME}/.reframe/topology"`` + + Directory prefix for storing the auto-detected processor topology. + + .. versionadded:: 4.7 + + .. py:attribute:: general.trap_job_errors :required: No @@ -1864,7 +1881,6 @@ General Configuration .. versionadded:: 3.2 - .. py:attribute:: general.keep_stage_files :required: No diff --git a/reframe/frontend/autodetect.py b/reframe/frontend/autodetect.py index e78ba24cba..45cc5a4637 100644 --- a/reframe/frontend/autodetect.py +++ b/reframe/frontend/autodetect.py @@ -204,7 +204,7 @@ def _emit_custom_script(job, env, commands): def detect_topology(): rt = runtime.runtime() detect_remote_systems = rt.get_option('general/0/remote_detect') - topo_prefix = os.path.join(os.getenv('HOME'), '.reframe/topology') + topo_prefix = rt.get_option('general/0/topology_prefix') for part in rt.system.partitions: getlogger().debug(f'detecting topology info for {part.fullname}') found_procinfo = False diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index 2350ec4fbd..c1684adc8a 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -601,6 +601,7 @@ "general/save_log_files": false, "general/table_format": "pretty", "general/target_systems": ["*"], + "general/topology_prefix": "${HOME}/.reframe/topology", "general/timestamp_dirs": "%Y%m%dT%H%M%S%z", "general/trap_job_errors": false, "general/unload_modules": [], From 268f9da4e028fdd7daa1f0bb61711ea0a83b31d6 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 13 Nov 2024 10:34:17 +0100 Subject: [PATCH 3/5] Expand envvars in `topology_prefix` configuration option --- reframe/frontend/autodetect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/frontend/autodetect.py b/reframe/frontend/autodetect.py index 45cc5a4637..6378155fbc 100644 --- a/reframe/frontend/autodetect.py +++ b/reframe/frontend/autodetect.py @@ -204,7 +204,7 @@ def _emit_custom_script(job, env, commands): def detect_topology(): rt = runtime.runtime() detect_remote_systems = rt.get_option('general/0/remote_detect') - topo_prefix = rt.get_option('general/0/topology_prefix') + topo_prefix = osext.expandvars(rt.get_option('general/0/topology_prefix')) for part in rt.system.partitions: getlogger().debug(f'detecting topology info for {part.fullname}') found_procinfo = False From e68d95331a30f9c01ce58a810f3ba5534f22e310 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 8 Nov 2024 19:16:13 +0100 Subject: [PATCH 4/5] WIP: Disable results storage by default --- examples/tutorial/scripts/runall.sh | 23 ++++++++++++++++ reframe/frontend/cli.py | 17 +++++++----- reframe/frontend/reporting/__init__.py | 36 ++++++++++++++++++++++++++ reframe/schemas/config.json | 4 +-- unittests/test_cli.py | 24 ++++++++++++----- 5 files changed, 89 insertions(+), 15 deletions(-) create mode 100644 examples/tutorial/scripts/runall.sh diff --git a/examples/tutorial/scripts/runall.sh b/examples/tutorial/scripts/runall.sh new file mode 100644 index 0000000000..9d44ee6cf9 --- /dev/null +++ b/examples/tutorial/scripts/runall.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -xe + +export RFM_ENABLE_RESULTS_STORAGE=1 + +pushd reframe-examples/tutorial +reframe -c stream/stream_runonly.py -r +reframe -c stream/stream_runonly.py -r +reframe -C config/baseline.py -c stream/stream_runonly.py -r +reframe -C config/baseline_environs.py -c stream/stream_build_run.py --exec-policy=serial -r +reframe -C config/baseline_environs.py -c stream/stream_fixtures.py -l +reframe -C config/baseline_environs.py -c stream/stream_fixtures.py -r +reframe -C config/baseline_environs.py -c stream/stream_variables.py -S num_threads=2 -r +reframe -C config/baseline_environs.py -c stream/stream_variables_fixtures.py --exec-policy=serial -S stream_test.stream_binary.array_size=50000000 -r +reframe -C config/baseline_environs.py -c stream/stream_parameters.py --exec-policy=serial -r +reframe -C config/baseline_environs.py -c stream/stream_variables_fixtures.py -P num_threads=1,2,4,8 --exec-policy=serial -r +reframe -c deps/deps_complex.py -r +reframe --restore-session --failed -r +reframe -c deps/deps_complex.py --keep-stage-files -r +reframe --restore-session --keep-stage-files -n T6 -r +reframe -c deps/deps_complex.py -n T6 -r +popd diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py index cdade59028..64f1b9daef 100644 --- a/reframe/frontend/cli.py +++ b/reframe/frontend/cli.py @@ -628,7 +628,7 @@ def main(): configvar='general/perf_report_spec', envvar='RFM_PERF_REPORT_SPEC', help=('Print a report for performance tests ' - '(default: "now:now/last:+job_nodelist/+result")') + '(default: "now-1d:now/last:+job_nodelist/+result")') ) reporting_options.add_argument( '--session-extras', action='append', metavar='KV_DATA', @@ -979,8 +979,7 @@ def restrict_logging(): '--describe-stored-testcases', '--list-stored-sessions', '--list-stored-testcases', - '--performance-compare', - '--performance-report']): + '--performance-compare']): sys.exit(1) rt = runtime.runtime() @@ -1655,9 +1654,12 @@ def module_unuse(*paths): if (options.performance_report and not options.dry_run and not report.is_empty()): try: - data = reporting.performance_compare( - rt.get_option('general/0/perf_report_spec'), report - ) + if rt.get_option('storage/0/enable'): + data = reporting.performance_compare( + rt.get_option('general/0/perf_report_spec'), report + ) + else: + data = report.report_data() except Exception as err: printer.warning( f'failed to generate performance report: {err}' @@ -1699,7 +1701,8 @@ def module_unuse(*paths): ) # Store the generated report for analytics - if not report.is_empty() and not options.dry_run: + if (rt.get_option('storage/0/enable') and + not report.is_empty() and not options.dry_run): try: sess_uuid = report.store() except Exception as e: diff --git a/reframe/frontend/reporting/__init__.py b/reframe/frontend/reporting/__init__.py index bac9753d52..c37c152610 100644 --- a/reframe/frontend/reporting/__init__.py +++ b/reframe/frontend/reporting/__init__.py @@ -460,6 +460,42 @@ def store(self): return StorageBackend.default().store(self, self.filename) + def report_data(self): + '''Get tabular data from this report''' + + columns = ['name', 'sysenv', 'job_nodelist', + 'pvar', 'punit', 'pval', 'result'] + data = [columns] + num_runs = len(self.__report['runs']) + for runid, runinfo in enumerate(self.__report['runs']): + for tc in map(_TCProxy, runinfo['testcases']): + if tc['result'] != 'success' and runid != num_runs - 1: + # Skip this testcase until its last retry + continue + + for pvar, reftuple in tc['perfvalues'].items(): + pvar = pvar.split(':')[-1] + pval, _, _, _, punit = reftuple + if pval is None: + # Ignore `None` performance values + # (performance tests that failed sanity) + continue + + line = [] + for c in columns: + if c == 'pvar': + line.append(pvar) + elif c == 'pval': + line.append(pval) + elif c == 'punit': + line.append(punit) + else: + line.append(tc[c]) + + data.append(line) + + return data + def generate_xml_report(self): '''Generate a JUnit report from a standard ReFrame JSON report.''' diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json index c1684adc8a..e18b29d494 100644 --- a/reframe/schemas/config.json +++ b/reframe/schemas/config.json @@ -589,7 +589,7 @@ "general/module_mappings": [], "general/non_default_craype": false, "general/perf_info_level": "info", - "general/perf_report_spec": "now:now/last:/+job_nodelist+result", + "general/perf_report_spec": "now-1d:now/last:/+job_nodelist+result", "general/pipeline_timeout": 3, "general/purge_environment": false, "general/remote_detect": false, @@ -634,7 +634,7 @@ "logging/handlers_perflog/httpjson_debug": false, "modes/options": [], "modes/target_systems": ["*"], - "storage/enable": true, + "storage/enable": false, "storage/backend": "sqlite", "storage/sqlite_conn_timeout": 60, "storage/sqlite_db_file": "${HOME}/.reframe/reports/results.db", diff --git a/unittests/test_cli.py b/unittests/test_cli.py index ef4f374be4..2879966dc2 100644 --- a/unittests/test_cli.py +++ b/unittests/test_cli.py @@ -420,7 +420,14 @@ def test_perflogdir_from_env(run_reframe, tmp_path, monkeypatch): 'default' / 'PerformanceFailureCheck.log') -def test_performance_report(run_reframe, run_action): +@pytest.fixture(params=['storage=yes', 'storage=no']) +def storage_enabled(request, monkeypatch): + value = request.param.split('=')[1] + monkeypatch.setenv('RFM_ENABLE_RESULTS_STORAGE', value) + return value == 'yes' + + +def test_performance_report(run_reframe, run_action, storage_enabled): returncode, stdout, stderr = run_reframe( checkpath=['unittests/resources/checks/frontend_checks.py'], more_options=['-n', '^PerformanceFailureCheck', @@ -433,6 +440,9 @@ def test_performance_report(run_reframe, run_action): else: assert returncode == 0 + if run_action != 'dry_run': + assert 'PERFORMANCE REPORT' in stdout + assert 'Traceback' not in stdout assert 'Traceback' not in stderr @@ -1269,7 +1279,8 @@ def assert_no_crash(returncode, stdout, stderr, exitcode=0): return returncode, stdout, stderr -def test_storage_options(run_reframe, tmp_path, table_format): +def test_storage_options(run_reframe, tmp_path, table_format, monkeypatch): + monkeypatch.setenv('RFM_ENABLE_RESULTS_STORAGE', 'yes') run_reframe2 = functools.partial( run_reframe, checkpath=['unittests/resources/checks/frontend_checks.py'], @@ -1335,8 +1346,7 @@ def test_storage_options(run_reframe, tmp_path, table_format): '--describe-stored-testcases=now-1d:now', '--list-stored-sessions', '--list-stored-testcases=now-1d:now/mean:/', - '--performance-compare=now-1d:now/now-1d/mean:/', - '--performance-report=now-1d:now/mean:/' + '--performance-compare=now-1d:now/now-1d/mean:/' ]) def storage_option(request): return request.param @@ -1359,7 +1369,8 @@ def test_disabled_results_storage(run_reframe, storage_option, monkeypatch): assert 'requires results storage' in stdout -def test_session_annotations(run_reframe): +def test_session_annotations(run_reframe, monkeypatch): + monkeypatch.setenv('RFM_ENABLE_RESULTS_STORAGE', 'yes') assert_no_crash(*run_reframe( checkpath=['unittests/resources/checks/frontend_checks.py'], action='-r', @@ -1373,13 +1384,14 @@ def test_session_annotations(run_reframe): assert text in stdout -def test_performance_compare(run_reframe, table_format): +def test_performance_compare(run_reframe, table_format, monkeypatch): def assert_no_crash(returncode, stdout, stderr, exitcode=0): assert returncode == exitcode assert 'Traceback' not in stdout assert 'Traceback' not in stderr return returncode, stdout, stderr + monkeypatch.setenv('RFM_ENABLE_RESULTS_STORAGE', 'yes') run_reframe2 = functools.partial( run_reframe, checkpath=['unittests/resources/checks/frontend_checks.py'], From a2949586083647e193e9c77847b795f8a53b40ba Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 13 Nov 2024 13:59:19 +0100 Subject: [PATCH 5/5] Do not use file locks when saving the report file --- reframe/frontend/reporting/__init__.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/reframe/frontend/reporting/__init__.py b/reframe/frontend/reporting/__init__.py index c37c152610..48ae1a0d1f 100644 --- a/reframe/frontend/reporting/__init__.py +++ b/reframe/frontend/reporting/__init__.py @@ -17,7 +17,6 @@ import uuid from collections import UserDict from collections.abc import Hashable -from filelock import FileLock import reframe as rfm import reframe.utility.jsonext as jsonext @@ -419,7 +418,11 @@ def update_run_stats(self, stats): 'num_skipped': self.__report['runs'][-1]['num_skipped'] }) - def _save(self, filename, compress, link_to_last): + def is_empty(self): + '''Return :obj:`True` is no test cases where run''' + return self.__report['session_info']['num_cases'] == 0 + + def save(self, filename, compress=False, link_to_last=True): filename = _expand_report_filename(filename, newfile=True) with open(filename, 'w') as fp: if compress: @@ -446,15 +449,6 @@ def _save(self, filename, compress, link_to_last): else: raise ReframeError('path exists and is not a symlink') - def is_empty(self): - '''Return :obj:`True` is no test cases where run''' - return self.__report['session_info']['num_cases'] == 0 - - def save(self, filename, compress=False, link_to_last=True): - prefix = os.path.dirname(filename) or '.' - with FileLock(os.path.join(prefix, '.report.lock')): - self._save(filename, compress, link_to_last) - def store(self): '''Store the report in the results storage.'''