From 7bfce73be02d26e5845ed084b1529e40d875ce16 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 3 Feb 2024 03:55:49 +0530 Subject: [PATCH 01/15] Prevent error in rclone detect --- cm-mlops/script/get-rclone/run.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cm-mlops/script/get-rclone/run.sh b/cm-mlops/script/get-rclone/run.sh index adacfff712..49e07a6927 100644 --- a/cm-mlops/script/get-rclone/run.sh +++ b/cm-mlops/script/get-rclone/run.sh @@ -1,3 +1,7 @@ #!/bin/bash +if ! command -v rclone &> /dev/null +then + exit 1 +fi rclone --version > tmp-ver.out test $? -eq 0 || exit 1 From 9e44a4b8219ac18fa20c720d28a36222b7b8b17c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 3 Feb 2024 03:56:46 +0530 Subject: [PATCH 02/15] Prevent error in rclone detect --- cm-mlops/script/get-ml-model-stable-diffusion/_cm.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json b/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json index 0944739b72..f7b2bb66dd 100644 --- a/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json +++ b/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json @@ -155,7 +155,7 @@ "rclone": { "group": "download-tool", "env": { - "CM_RCLONE_CONFIG": "rclone config create mlc-inference s3 provider=LyveCloud access_key_id=0LITLNQMHZALM5AK secret_access_key=YQKYTMBY23TMZHLOYFJKL5CHHS0CWYUC endpoint=s3.us-east-1.lyvecloud.seagate.com", + "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=LyveCloud access_key_id=0LITLNQMHZALM5AK secret_access_key=YQKYTMBY23TMZHLOYFJKL5CHHS0CWYUC endpoint=s3.us-east-1.lyvecloud.seagate.com", "CM_DOWNLOAD_TOOL": "rclone" }, "adr": { From caaf7cf22ccefeefdbc6a1da43757241c71a329d Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 3 Feb 2024 03:58:56 +0530 Subject: [PATCH 03/15] Prevent error in rclone detect --- cm-mlops/script/get-ml-model-gptj/_cm.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cm-mlops/script/get-ml-model-gptj/_cm.json b/cm-mlops/script/get-ml-model-gptj/_cm.json index acf405c5e1..b340f04b83 100644 --- a/cm-mlops/script/get-ml-model-gptj/_cm.json +++ b/cm-mlops/script/get-ml-model-gptj/_cm.json @@ -85,7 +85,7 @@ "CM_UNZIP": "yes", "CM_DOWNLOAD_CHECKSUM_NOT_USED": "e677e28aaf03da84584bb3073b7ee315", "CM_PACKAGE_URL": "https://cloud.mlcommons.org/index.php/s/QAZ2oM94MkFtbQx/download", - "CM_RCLONE_CONFIG": "rclone config create mlc-inference s3 provider=LyveCloud access_key_id=0LITLNQMHZALM5AK secret_access_key=YQKYTMBY23TMZHLOYFJKL5CHHS0CWYUC endpoint=s3.us-east-1.lyvecloud.seagate.com", + "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=LyveCloud access_key_id=0LITLNQMHZALM5AK secret_access_key=YQKYTMBY23TMZHLOYFJKL5CHHS0CWYUC endpoint=s3.us-east-1.lyvecloud.seagate.com", "CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-s3/gpt-j" }, "add_deps_recursive": { From 357edcdd6b8724648f0addecbaf3842d7b67b904 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 3 Feb 2024 04:09:14 +0530 Subject: [PATCH 04/15] Prevent error in rclone detect --- cm-mlops/script/download-file/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cm-mlops/script/download-file/customize.py b/cm-mlops/script/download-file/customize.py index 9e6812422d..04b4606d6c 100644 --- a/cm-mlops/script/download-file/customize.py +++ b/cm-mlops/script/download-file/customize.py @@ -109,7 +109,7 @@ def preprocess(i): elif tool == "rclone": if env.get('CM_RCLONE_CONFIG_CMD', '') != '': env['CM_DOWNLOAD_CONFIG_CMD'] = env['CM_RCLONE_CONFIG_CMD'] - env['CM_DOWNLOAD_CMD'] = f"rclone copy {url} ./{env['CM_DOWNLOAD_FILENAME']} -P" + env['CM_DOWNLOAD_CMD'] = f"rclone copy {url} {os.path.join(os.getcwd(), os.env['CM_DOWNLOAD_FILENAME'])} -P" filename = env['CM_DOWNLOAD_FILENAME'] env['CM_DOWNLOAD_DOWNLOADED_FILENAME'] = filename From e5619d20d8931be9d69f6a55f5e30d28aef7744d Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 3 Feb 2024 04:10:47 +0530 Subject: [PATCH 05/15] Prevent error in rclone detect --- cm-mlops/script/download-file/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cm-mlops/script/download-file/customize.py b/cm-mlops/script/download-file/customize.py index 04b4606d6c..3b76de9775 100644 --- a/cm-mlops/script/download-file/customize.py +++ b/cm-mlops/script/download-file/customize.py @@ -109,7 +109,7 @@ def preprocess(i): elif tool == "rclone": if env.get('CM_RCLONE_CONFIG_CMD', '') != '': env['CM_DOWNLOAD_CONFIG_CMD'] = env['CM_RCLONE_CONFIG_CMD'] - env['CM_DOWNLOAD_CMD'] = f"rclone copy {url} {os.path.join(os.getcwd(), os.env['CM_DOWNLOAD_FILENAME'])} -P" + env['CM_DOWNLOAD_CMD'] = f"rclone copy {url} {os.path.join(os.getcwd(), env['CM_DOWNLOAD_FILENAME'])} -P" filename = env['CM_DOWNLOAD_FILENAME'] env['CM_DOWNLOAD_DOWNLOADED_FILENAME'] = filename From 31d64f365fc226ab9b9d9f44c020c2b6c97345c2 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 3 Feb 2024 04:18:10 +0530 Subject: [PATCH 06/15] Prevent error in rclone detect --- cm-mlops/script/get-ml-model-gptj/_cm.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cm-mlops/script/get-ml-model-gptj/_cm.json b/cm-mlops/script/get-ml-model-gptj/_cm.json index b340f04b83..1c5b7e768b 100644 --- a/cm-mlops/script/get-ml-model-gptj/_cm.json +++ b/cm-mlops/script/get-ml-model-gptj/_cm.json @@ -19,6 +19,7 @@ "prehook_deps": [ { "env": { + "CM_DOWNLOAD_FINAL_ENV_NAME": "GPTJ_CHECKPOINT_PATH", "CM_EXTRACT_FINAL_ENV_NAME": "GPTJ_CHECKPOINT_PATH", "CM_EXTRACT_TO_FOLDER": "gpt-j" }, @@ -87,7 +88,9 @@ "CM_PACKAGE_URL": "https://cloud.mlcommons.org/index.php/s/QAZ2oM94MkFtbQx/download", "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=LyveCloud access_key_id=0LITLNQMHZALM5AK secret_access_key=YQKYTMBY23TMZHLOYFJKL5CHHS0CWYUC endpoint=s3.us-east-1.lyvecloud.seagate.com", "CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-s3/gpt-j" - }, + } + }, + "pytorch,fp32,wget": { "add_deps_recursive": { "dae": { "tags": "_extract" From 05530cf130246ab917243c2998498e7087cf8046 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 3 Feb 2024 04:51:53 +0530 Subject: [PATCH 07/15] Fix gptj download name with rclone --- cm-mlops/script/get-ml-model-gptj/_cm.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cm-mlops/script/get-ml-model-gptj/_cm.json b/cm-mlops/script/get-ml-model-gptj/_cm.json index 1c5b7e768b..5df60ee2b0 100644 --- a/cm-mlops/script/get-ml-model-gptj/_cm.json +++ b/cm-mlops/script/get-ml-model-gptj/_cm.json @@ -82,7 +82,6 @@ "pytorch,fp32": { "env": { "CM_DOWNLOAD_EXTRA_OPTIONS": " --output-document checkpoint.zip", - "CM_DOWNLOAD_FILENAME": "checkpoint.zip", "CM_UNZIP": "yes", "CM_DOWNLOAD_CHECKSUM_NOT_USED": "e677e28aaf03da84584bb3073b7ee315", "CM_PACKAGE_URL": "https://cloud.mlcommons.org/index.php/s/QAZ2oM94MkFtbQx/download", @@ -189,7 +188,8 @@ } }, "env": { - "CM_DOWNLOAD_URL": "<<>>" + "CM_DOWNLOAD_URL": "<<>>", + "CM_DOWNLOAD_FILENAME": "checkpoint.zip" } }, "rclone": { @@ -200,6 +200,7 @@ } }, "env": { + "CM_DOWNLOAD_FILENAME": "checkpoint", "CM_DOWNLOAD_URL": "<<>>" } }, From 2ffc7d60c97b7aee9eafb1c2d64ae036aa5d9e9b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 3 Feb 2024 05:04:01 +0530 Subject: [PATCH 08/15] Support run_deps for docker (WIP) --- cm-mlops/automation/script/module.py | 2 +- cm-mlops/automation/script/module_misc.py | 15 ++++++++++++--- .../build-mlperf-inference-server-nvidia/_cm.yaml | 2 ++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index 2ae9222d9e..63df342f0b 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -2525,7 +2525,7 @@ def _call_run_deps(script, deps, local_env_keys, local_env_keys_from_meta, env, ############################################################################## def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces, remembered_selections, variation_tags_string='', from_cache=False, debug_script_tags='', - verbose=False, show_time=False, extra_recursion_spaces=' ', run_state={'deps':[], 'fake_deps':[]}): + verbose=False, show_time=False, extra_recursion_spaces=' ', run_state={'deps':[], 'fake_deps':[], 'parent': None}): """ Runs all the enabled dependencies and pass them env minus local env """ diff --git a/cm-mlops/automation/script/module_misc.py b/cm-mlops/automation/script/module_misc.py index e345fe2e21..1394196560 100644 --- a/cm-mlops/automation/script/module_misc.py +++ b/cm-mlops/automation/script/module_misc.py @@ -1537,7 +1537,7 @@ def docker(i): return {'return':1, 'error':'no scripts were found'} env=i.get('env', {}) - env['CM_RUN_STATE_DOCKER'] = True + env['CM_RUN_STATE_DOCKER'] = False docker_cache = i.get('docker_cache', "yes") if docker_cache in ["no", False, "False" ]: @@ -1599,8 +1599,16 @@ def docker(i): update_path_for_docker('.', mounts, force_path_target=current_path_target) - _os = i.get('docker_os', meta.get('docker_os', 'ubuntu')) - version = i.get('docker_os_version', meta.get('docker_os_version', '22.04')) + _os = i.get('docker_os', docker_settings.get('docker_os', 'ubuntu')) + version = i.get('docker_os_version', docker_settings.get('docker_os_version', '22.04')) + + deps = docker_settings.get('deps', []) + if deps: + # Todo: Support state, const and add_deps_recursive + script_automation = i['self_module'] + r = script_automation._run_deps(deps, [], env, {}, {}, {}, {}, '',{}) + if r['return'] > 0: + return r for key in docker_settings.get('mounts', []): mounts.append(key) @@ -1722,6 +1730,7 @@ def docker(i): if r['return']>0: return r run_cmd = r['run_cmd_string'] + env['CM_RUN_STATE_DOCKER'] = True if docker_settings.get('mount_current_dir','')=='yes': run_cmd = 'cd '+current_path_target+' && '+run_cmd diff --git a/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml b/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml index bd485784a7..2a618e9ba3 100644 --- a/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml +++ b/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml @@ -228,6 +228,8 @@ docker: tensorrt_tar_file_path: CM_TENSORRT_TAR_FILE_PATH cuda_run_file_path: CUDA_RUN_FILE_LOCAL_PATH scratch_path: MLPERF_SCRATCH_PATH + deps: + - tags: get,mlperf,inference,nvidia,scratch,space mounts: - "${{ IMAGENET_PATH }}:/data/imagenet-val" - "${{ RESULTS_DIR }}:/home/cmuser/results_dir" From cb47a3e4035eed822d7cc6e954d01172cc8a8c48 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sat, 3 Feb 2024 10:21:29 +0100 Subject: [PATCH 09/15] a few clean ups (including YAML for main MLPerf inference automation) --- README.md | 2 +- .../run-mlperf-inference-app/README-about.md | 15 +- .../{_cm.json => _cm.json_not_used} | 0 .../script/run-mlperf-inference-app/_cm.yaml | 377 ++++++++++++++++++ cm/cmind/core.py | 14 +- cm/setup.py | 6 +- docs/_generator/generate_toc.cmd | 1 + 7 files changed, 394 insertions(+), 21 deletions(-) rename cm-mlops/script/run-mlperf-inference-app/{_cm.json => _cm.json_not_used} (100%) create mode 100644 cm-mlops/script/run-mlperf-inference-app/_cm.yaml diff --git a/README.md b/README.md index ecab20bd54..f6e6ff68bb 100755 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Some key requirements for the CM design are: with CM recipes for repeatability and reproducibility. Below you can find a few examples of this collaborative engineering effort sponsored -by [MLCommons (non-profit organization with 125+ organizations)](https://mlcommons.org) - +by [MLCommons (non-profit organization with 125+ members)](https://mlcommons.org) - a few most-commonly used [automation recipes](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) that can be chained into more complex automation workflows [using simple JSON or YAML](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml). diff --git a/cm-mlops/script/run-mlperf-inference-app/README-about.md b/cm-mlops/script/run-mlperf-inference-app/README-about.md index 546b9c56d9..5cdc7bc085 100644 --- a/cm-mlops/script/run-mlperf-inference-app/README-about.md +++ b/cm-mlops/script/run-mlperf-inference-app/README-about.md @@ -1,13 +1,6 @@ -This portable CM (CK2) script provides a unified and portable interface to the MLPerf inference benchmark -modularized by other [portable CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) -being developed by the open [MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/mlperf-education-workgroup.md). +This is a ready-to-use CM automation recipe that provides a unified and portable interface to the MLPerf inference benchmark +assembled from other [portable CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +being developed by the open [MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md). -It is a higher-level wrapper that automatically generates the command line for the [universal MLPerf inference script](../app-mlperf-inference) +This automation recipe automatically generates the command line for the [universal MLPerf inference script](../app-mlperf-inference) to run MLPerf scenarios for a given ML task, model, runtime and device, and prepare and validate submissions. - -Check these [tutorials](https://github.com/mlcommons/ck/blob/master/docs/tutorials/sc22-scc-mlperf.md) from the Student Cluster Competition -at Supercomputing'22 to understand how to use this script to run the MLPerf inference benchmark and automate submissions. - -See the development roadmap [here](https://github.com/mlcommons/ck/issues/536). - -See extension projects to enable collaborative benchmarking, design space exploration and optimization of ML and AI Systems [here](https://github.com/mlcommons/ck/issues/627). diff --git a/cm-mlops/script/run-mlperf-inference-app/_cm.json b/cm-mlops/script/run-mlperf-inference-app/_cm.json_not_used similarity index 100% rename from cm-mlops/script/run-mlperf-inference-app/_cm.json rename to cm-mlops/script/run-mlperf-inference-app/_cm.json_not_used diff --git a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml new file mode 100644 index 0000000000..6f12b26cd4 --- /dev/null +++ b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml @@ -0,0 +1,377 @@ +alias: run-mlperf-inference-app +uid: 4a5d5b13fd7e4ac8 + +automation_alias: script +automation_uid: 5b4e0237da074764 + +category: Modular MLPerf inference benchmark pipeline +gui_title: CM GUI to run the MLPerf inference benchmark and prepare submissions + +clean_output_files: +- open.tar.gz +- summary.csv +- summary.json + +tags: +- run +- common +- generate-run-cmds +- run-mlperf +- vision +- mlcommons +- mlperf +- inference +- reference + +default_env: + CM_MLPERF_IMPLEMENTATION: reference + CM_MLPERF_MODEL: resnet50 + CM_MLPERF_RUN_STYLE: test + CM_OUTPUT_FOLDER_NAME: test_results + +input_mapping: + backend: CM_MLPERF_BACKEND + category: CM_MLPERF_SUBMISSION_SYSTEM_TYPE + clean: CM_MLPERF_CLEAN_ALL + compliance: CM_MLPERF_LOADGEN_COMPLIANCE + dashboard_wb_project: CM_MLPERF_DASHBOARD_WANDB_PROJECT + dashboard_wb_user: CM_MLPERF_DASHBOARD_WANDB_USER + debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM + device: CM_MLPERF_DEVICE + division: CM_MLPERF_SUBMISSION_DIVISION + execution_mode: CM_MLPERF_EXECUTION_MODE + find_performance: CM_MLPERF_FIND_PERFORMANCE_MODE + gpu_name: CM_NVIDIA_GPU_NAME + hw_name: CM_HW_NAME + hw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA + imagenet_path: IMAGENET_PATH + implementation: CM_MLPERF_IMPLEMENTATION + lang: CM_MLPERF_IMPLEMENTATION + mode: CM_MLPERF_LOADGEN_MODE + model: CM_MLPERF_MODEL + multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY + network: CM_NETWORK_LOADGEN + offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS + output_dir: OUTPUT_BASE_DIR + output_summary: MLPERF_INFERENCE_SUBMISSION_SUMMARY + output_tar: MLPERF_INFERENCE_SUBMISSION_TAR_FILE + power: CM_SYSTEM_POWER + precision: CM_MLPERF_MODEL_PRECISION + preprocess_submission: CM_RUN_MLPERF_SUBMISSION_PREPROCESSOR + push_to_github: CM_MLPERF_RESULT_PUSH_TO_GITHUB + readme: CM_MLPERF_README + regenerate_accuracy_file: CM_MLPERF_REGENERATE_ACCURACY_FILE + regenerate_files: CM_REGENERATE_MEASURE_FILES + rerun: CM_RERUN + results_dir: OUTPUT_BASE_DIR + results_git_url: CM_MLPERF_RESULTS_GIT_REPO_URL + run_checker: CM_RUN_SUBMISSION_CHECKER + run_style: CM_MLPERF_EXECUTION_MODE + scenario: CM_MLPERF_LOADGEN_SCENARIO + server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS + singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY + skip_submission_generation: CM_MLPERF_SKIP_SUBMISSION_GENERATION + skip_truncation: CM_SKIP_TRUNCATE_ACCURACY + submission_dir: CM_MLPERF_SUBMISSION_DIR + submitter: CM_MLPERF_SUBMITTER + sut_servers: CM_NETWORK_LOADGEN_SUT_SERVERS + sw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA + system_type: CM_MLPERF_SUBMISSION_SYSTEM_TYPE + target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY + target_qps: CM_MLPERF_LOADGEN_TARGET_QPS + test_query_count: CM_TEST_QUERY_COUNT + +new_state_keys: +- app_mlperf_inference_* + +deps: +- tags: detect,os +- tags: detect,cpu +- names: + - python + - python3 + tags: get,python3 +- names: + - inference-src + tags: get,mlcommons,inference,src +- tags: get,sut,description + +docker: + fake_run_deps: true + mounts: + - ${{ GPTJ_CHECKPOINT_PATH }}:${{ GPTJ_CHECKPOINT_PATH }} + - ${{ INSTALL_DATA_PATH }}:/install_data + - ${{ DATA_PATH }}:/data + run: true + +variations: + + accuracy-only: + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_MODE: accuracy + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_MLPERF_ACCURACY: 'on' + CM_RUN_SUBMISSION_CHECKER: 'no' + group: submission-generation + + all-modes: + env: + CM_MLPERF_LOADGEN_ALL_MODES: 'yes' + group: mode + + all-scenarios: + env: + CM_MLPERF_LOADGEN_ALL_SCENARIOS: 'yes' + + compliance: + env: + CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' + + dashboard: + default_gui: true + env: + CM_MLPERF_DASHBOARD: 'on' + + find-performance: + default: true + env: + CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes' + CM_MLPERF_LOADGEN_ALL_MODES: 'no' + CM_MLPERF_LOADGEN_MODE: performance + CM_MLPERF_RESULT_PUSH_TO_GITHUB: false + group: submission-generation + + full: + add_deps_recursive: + coco2014-original: + tags: _full + coco2014-preprocessed: + tags: _full + imagenet-original: + tags: _full + imagenet-preprocessed: + tags: _full + openimages-original: + tags: _full + openimages-preprocessed: + tags: _full + openorca-original: + tags: _full + openorca-preprocessed: + tags: _full + env: + CM_MLPERF_SUBMISSION_GENERATION_STYLE: full + group: submission-generation-style + + performance-only: + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_MODE: performance + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_SUBMISSION_CHECKER: 'no' + group: submission-generation + + populate-readme: + base: + - all-modes + default_variations: + submission-generation-style: full + env: + CM_MLPERF_README: 'yes' + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_SUBMISSION_CHECKER: 'no' + group: submission-generation + + r2.1: + env: + CM_MLPERF_INFERENCE_VERSION: '2.1' + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r2.1_default + group: reproducibility + + r3.0: + env: + CM_MLPERF_INFERENCE_VERSION: '3.0' + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r3.0_default + group: reproducibility + + r3.1: + env: + CM_MLPERF_INFERENCE_VERSION: '3.1' + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r3.1_default + group: reproducibility + + r4.0: + default: true + env: + CM_MLPERF_INFERENCE_VERSION: '4.0' + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.0_default + group: reproducibility + + short: + add_deps_recursive: + submission-checker: + tags: _short-run + default: 'true' + env: + CM_MLPERF_SUBMISSION_GENERATION_STYLE: short + group: submission-generation-style + + submission: + base: + - all-modes + default_gui: true + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_MLPERF_ACCURACY: 'on' + CM_RUN_SUBMISSION_CHECKER: 'yes' + CM_TAR_SUBMISSION_DIR: 'yes' + group: submission-generation + post_deps: + - names: + - submission-generator + skip_if_env: + CM_MLPERF_SKIP_SUBMISSION_GENERATION: + - 'yes' + - 'True' + tags: generate,mlperf,inference,submission + +versions: + master: {} + r2.1: {} + +input_description: + adr.compiler.tags: + default: gcc + desc: Compiler for loadgen and any C/C++ part of implementation + adr.inference-src-loadgen.env.CM_GIT_URL: + default: '' + desc: Git URL for MLPerf inference sources to build LoadGen (to enable non-reference + implementations) + adr.inference-src.env.CM_GIT_URL: + default: '' + desc: Git URL for MLPerf inference sources to run benchmarks (to enable non-reference + implementations) + adr.mlperf-inference-implementation.max_batchsize: + desc: Maximum batchsize to be used + adr.mlperf-inference-implementation.num_threads: + desc: Number of threads (reference&C++ implementation only) + adr.python.name: + default: mlperf + desc: Python virtual environment name (optional) + adr.python.version: + desc: Force Python version (must have all system deps) + adr.python.version_min: + default: '3.8' + desc: Minimal Python version + backend: + choices: + - onnxruntime + - tf + - pytorch + - deepsparse + - tensorrt + - tvm-onnx + default: onnxruntime + desc: MLPerf backend + clean: + boolean: true + default: true + desc: Clean run + compliance: + choices: + - 'yes' + - 'no' + default: 'yes' + desc: Whether to run compliance tests (applicable only for closed division) + dashboard_wb_project: + default: cm-mlperf-dse-testing + desc: W&B dashboard project + dashboard_wb_user: + default: cmind + desc: W&B dashboard user + device: + choices: + - cpu + - cuda + default: cpu + desc: MLPerf device + execution_mode: + choices: + - test + - fast + - valid + default: test + desc: Execution mode + hw_name: + default: default + desc: MLPerf hardware name (from [here](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-sut-description/hardware)) + implementation: + choices: + - reference + - cpp + - nvidia-original + - tflite-cpp + default: reference + desc: MLPerf implementation + mode: + choices: + - '' + - accuracy + - performance + default: '' + desc: MLPerf mode + model: + choices: + - resnet50 + - retinanet + - bert-99 + - bert-99.9 + - 3d-unet + - rnnt + default: resnet50 + desc: MLPerf model + multistream_target_latency: + desc: Set MultiStream target latency + offline_target_qps: + desc: Set LoadGen Offline target QPS + precision: + choices: + - fp32 + - int8 + default: '' + desc: MLPerf model precision + quiet: + boolean: true + default: false + desc: Quiet run (select default values for all questions) + results_dir: + desc: Folder path where run results should be stored (defaults to the current + working directory) + scenario: + choices: + - Offline + - Server + - SingleStream + - MultiStream + default: Offline + desc: MLPerf scenario + server_target_qps: + desc: Set Server target QPS + singlestream_target_latency: + desc: Set SingleStream target latency + submission_dir: + desc: Folder path where submission tree (to be submitted) must be stored + submitter: + default: TheCommunity + desc: Submitter name (without space) + target_latency: + desc: Set Target latency + target_qps: + desc: Set LoadGen target QPS + diff --git a/cm/cmind/core.py b/cm/cmind/core.py index 9b5c9f6f00..cbd2270e26 100644 --- a/cm/cmind/core.py +++ b/cm/cmind/core.py @@ -262,7 +262,7 @@ def access(self, i, out = None): print (self.cfg['info_cli']) if cm_help or extra_help: - print_db_actions(self.common_automation, self.cfg['action_substitutions']) + print_db_actions(self.common_automation, self.cfg['action_substitutions'], '') return {'return':0, 'warning':'no action specified'} @@ -512,7 +512,7 @@ def access(self, i, out = None): print ('') print ('Automation python module: {}'.format(automation_full_path)) - r = print_db_actions(self.common_automation, self.cfg['action_substitutions']) + r = print_db_actions(self.common_automation, self.cfg['action_substitutions'], automation_meta.get('alias','')) if r['return']>0: return r db_actions = r['db_actions'] @@ -528,7 +528,7 @@ def access(self, i, out = None): print ('') for d in actions: - print ('* '+d) + print (' * cm ' + d + ' ' + automation_meta.get('alias','')) return {'return':0, 'warning':'no automation action'} @@ -626,7 +626,7 @@ def parse_cm_object_and_check_current_dir(cmind, artifact): return utils.parse_cm_object(artifact) ############################################################ -def print_db_actions(automation, equivalent_actions): +def print_db_actions(automation, equivalent_actions, automation_name): """ Internal function: prints CM database actions. @@ -636,7 +636,7 @@ def print_db_actions(automation, equivalent_actions): import types print ('') - print ('Collective database actions:') + print ('Common actions to manage CM repositories:') print ('') db_actions=[] @@ -662,7 +662,9 @@ def print_db_actions(automation, equivalent_actions): if se!='': s+=' (' + se + ')' - print (' * ' + s) + x = ' ' + automation_name if automation_name!='' else '' + + print (' * cm ' + s + x) return {'return':0, 'db_actions':db_actions} diff --git a/cm/setup.py b/cm/setup.py index 329f8822be..f3a584f7f4 100644 --- a/cm/setup.py +++ b/cm/setup.py @@ -77,10 +77,10 @@ def run(self): license="Apache 2.0", - long_description=open(convert_path('./README.md'), encoding="utf-8").read(), + long_description=open('README.md', encoding="utf-8").read(), long_description_content_type="text/markdown", - url="https://github.com/mlcommons/ck/tree/master/cm", + url="https://github.com/mlcommons/ck", python_requires="", # do not force for testing @@ -106,5 +106,5 @@ def run(self): zip_safe=False, - keywords="collective mind,cmind,ck2,cdatabase,cmeta,automation,portability,reusability,meta,JSON,YAML,python,api,cli" + keywords="collective mind,cmind,ck3,cdatabase,cmeta,automation,portability,reusability,productivity,meta,JSON,YAML,python,api,cli" ) diff --git a/docs/_generator/generate_toc.cmd b/docs/_generator/generate_toc.cmd index 38b35b6a73..e720d9afb9 100644 --- a/docs/_generator/generate_toc.cmd +++ b/docs/_generator/generate_toc.cmd @@ -21,6 +21,7 @@ cm create-toc-from-md utils --input=taskforce.md cm create-toc-from-md utils --input=installation.md cm create-toc-from-md utils --input=faq.md cm create-toc-from-md utils --input=README.md +cm create-toc-from-md utils --input=getting-started.md cd mlperf/inference From d707a8c8eea29cbd70d6513f84d5e94925a37840 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sat, 3 Feb 2024 11:23:17 +0100 Subject: [PATCH 10/15] added possibility to add extra tags from ENV: "extra_cache_tags": "ml-model,resnet50,raw,ml-model-resnet50,_<<>>", Needed when we download files via CM script to recreate original variations ... --- cm-mlops/automation/script/module.py | 34 +- .../script/get-ml-model-resnet50/_cm.json | 313 +++++++++--------- 2 files changed, 180 insertions(+), 167 deletions(-) diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index 63df342f0b..46dd3e6978 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -368,17 +368,6 @@ def run(self, i): if value != '': env['CM_' + key.upper()] = value - # Check extra cache tags - x = env.get('CM_EXTRA_CACHE_TAGS','').strip() - extra_cache_tags = [] if x=='' else x.split(',') - - if i.get('extra_cache_tags','')!='': - for x in i['extra_cache_tags'].strip().split(','): - if x!='' and x not in extra_cache_tags: - extra_cache_tags.append(x) - - if env.get('CM_NAME','')!='': - extra_cache_tags.append('name-'+env['CM_NAME'].strip().lower()) ############################################################################################################ @@ -972,6 +961,29 @@ def run(self, i): update_env_with_values(env) + + ############################################################################################################ + # Check extra cache tags + x = env.get('CM_EXTRA_CACHE_TAGS','').strip() + extra_cache_tags = [] if x=='' else x.split(',') + + if i.get('extra_cache_tags','')!='': + for x in i['extra_cache_tags'].strip().split(','): + if x!='': + if '<<<' in x: + import re + tmp_values = re.findall(r'<<<(.*?)>>>', str(x)) + for tmp_value in tmp_values: + xx = str(env.get(tmp_value,'')) + x = x.replace("<<<"+tmp_value+">>>", xx) + if x not in extra_cache_tags: + extra_cache_tags.append(x) + + if env.get('CM_NAME','')!='': + extra_cache_tags.append('name-'+env['CM_NAME'].strip().lower()) + + + ############################################################################################################ # Check if need to clean output files clean_output_files = meta.get('clean_output_files', []) diff --git a/cm-mlops/script/get-ml-model-resnet50/_cm.json b/cm-mlops/script/get-ml-model-resnet50/_cm.json index 1c3922c4b9..676208c3db 100644 --- a/cm-mlops/script/get-ml-model-resnet50/_cm.json +++ b/cm-mlops/script/get-ml-model-resnet50/_cm.json @@ -2,9 +2,10 @@ "alias": "get-ml-model-resnet50", "automation_alias": "script", "automation_uid": "5b4e0237da074764", - "category": "AI/ML models", "cache": true, + "category": "AI/ML models", "env": { + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH", "CM_ML_MODEL": "RESNET50", "CM_ML_MODEL_DATASET": "imagenet2012-val", "CM_ML_MODEL_IMAGE_HEIGHT": "224", @@ -12,86 +13,117 @@ "CM_ML_MODEL_NORMALIZE_DATA": "0", "CM_ML_MODEL_RETRAINING": "no", "CM_ML_MODEL_SUBTRACT_MEANS": "YES", - "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no", - "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH" + "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no" }, "new_env_keys": [ "CM_ML_MODEL_*" ], - "tags": [ - "get", - "raw", - "ml-model", - "resnet50", - "ml-model-resnet50", - "image-classification" - ], - "uid": "56203e4e998b4bc0", "prehook_deps": [ { - "tags": "download-and-extract", "env": { "CM_EXTRACT_EXTRACTED_FILENAME": "<<>>" }, + "extra_cache_tags": "ml-model,resnet50,raw,ml-model-resnet50,_<<>>", "force_cache": true, - "extra_cache_tags": "ml-model,resnet50,raw,ml-model-resnet50", + "names": [ + "model-downloader" + ], + "tags": "download-and-extract", "update_tags_from_env_with_prefix": { "_url.": [ "CM_PACKAGE_URL" ] - }, - "names": [ - "model-downloader" - ] + } } ], + "tags": [ + "get", + "raw", + "ml-model", + "resnet50", + "ml-model-resnet50", + "image-classification" + ], + "uid": "56203e4e998b4bc0", "variations": { - "fp32": { - "group": "precision", + "argmax": { "default": true, "env": { - "CM_ML_MODEL_INPUT_DATA_TYPES": "fp32", - "CM_ML_MODEL_PRECISION": "fp32", - "CM_ML_MODEL_WEIGHT_DATA_TYPES": "fp32" - } + "CM_ML_MODEL_OUTPUT_LAYER_ARGMAX": "yes" + }, + "group": "model-output" }, - "int8": { - "group": "precision", + "batch_size.#": { "env": { - "CM_ML_MODEL_INPUT_DATA_TYPES": "int8", - "CM_ML_MODEL_PRECISION": "int8", - "CM_ML_MODEL_WEIGHT_DATA_TYPES": "int8" + "CM_ML_MODEL_BATCH_SIZE": "#" } }, - "opset-11": { - "group": "opset-version", + "batch_size.1": { "env": { - "CM_ML_MODEL_ONNX_OPSET": "11" + "CM_ML_MODEL_BATCH_SIZE": "1" } }, - "opset-8": { - "group": "opset-version", + "fix-input-shape": { + "deps": [ + { + "names": [ + "python", + "python3" + ], + "tags": "get,python3" + } + ] + }, + "fp32": { + "default": true, "env": { - "CM_ML_MODEL_ONNX_OPSET": "8" - } + "CM_ML_MODEL_INPUT_DATA_TYPES": "fp32", + "CM_ML_MODEL_PRECISION": "fp32", + "CM_ML_MODEL_WEIGHT_DATA_TYPES": "fp32" + }, + "group": "precision" }, + "from-tf": {}, "huggingface_default": { "env": { "CM_PACKAGE_URL": "https://huggingface.co/ctuning/mlperf-inference-resnet50-onnx-fp32-imagenet2012-v1.0/resolve/main/resnet50_v1.onnx" } }, - "onnx,opset-11": { + "int8": { "env": { - "CM_PACKAGE_URL": "https://zenodo.org/record/4735647/files/resnet50_v1.onnx" - } + "CM_ML_MODEL_INPUT_DATA_TYPES": "int8", + "CM_ML_MODEL_PRECISION": "int8", + "CM_ML_MODEL_WEIGHT_DATA_TYPES": "int8" + }, + "group": "precision" }, - "onnx,opset-8": { + "ncnn": { "env": { - "CM_PACKAGE_URL": "https://zenodo.org/record/2592612/files/resnet50_v1.onnx" - } + "CM_ML_MODEL_FRAMEWORK": "ncnn" + }, + "group": "framework" + }, + "ncnn,fp32": { + "env": { + "CM_PACKAGE_URL": "https://zenodo.org/record/8073420/files/resnet50_v1.bin?download=1" + }, + "post_deps": [ + { + "env": { + "CM_EXTRACT_EXTRACTED_FILENAME": "<<>>" + }, + "extra_cache_tags": "ml-model-params,params,resnet50,ncnn,model-params", + "tags": "download-and-extract,_url.https://zenodo.org/record/8073420/files/resnet50_v1.param?download=" + } + ] + }, + "no-argmax": { + "env": { + "CM_ML_MODEL_OUTPUT_LAYER_ARGMAX": "no" + }, + "group": "model-output" }, "onnx": { - "group": "framework", "default": true, "default_variations": { "opset-version": "opset-11" @@ -101,14 +133,13 @@ "CM_ML_MODEL_FRAMEWORK": "onnx", "CM_ML_MODEL_INPUT_LAYERS": "input_tensor:0", "CM_ML_MODEL_INPUT_LAYER_NAME": "input_tensor:0", - "CM_ML_MODEL_OUTPUT_LAYERS": "softmax_tensor:0", "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor:0\\\": (BATCH_SIZE, 3, 224, 224)", + "CM_ML_MODEL_OUTPUT_LAYERS": "softmax_tensor:0", "CM_ML_MODEL_OUTPUT_LAYER_NAME": "softmax_tensor:0", "CM_ML_MODEL_STARTING_WEIGHTS_FILENAME": "<<>>", "CM_ML_MODEL_VER": "1.5" - } - }, - "from-tf": { + }, + "group": "framework" }, "onnx,from-tf": { "env": { @@ -116,188 +147,158 @@ "CM_ML_MODEL_FRAMEWORK": "onnx", "CM_ML_MODEL_INPUT_LAYERS": "input_tensor", "CM_ML_MODEL_INPUT_LAYER_NAME": "input_tensor", - "CM_ML_MODEL_OUTPUT_LAYERS": "softmax_tensor", "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor\\\": (BATCH_SIZE, 224, 224, 3)", + "CM_ML_MODEL_OUTPUT_LAYERS": "softmax_tensor", "CM_ML_MODEL_OUTPUT_LAYER_NAME": "softmax_tensor", "CM_ML_MODEL_STARTING_WEIGHTS_FILENAME": "https://zenodo.org/record/2535873/files/resnet50_v1.pb" } }, "onnx,from-tf,fp32": { - "env": { - "CM_PACKAGE_URL": "https://drive.google.com/uc?id=15wZ_8Vt12cb10IEBsln8wksD1zGwlbOM", - "CM_DOWNLOAD_FILENAME": "resnet50_v1_modified.onnx" - }, "adr": { "model-downloader": { "tags": "_gdown" } + }, + "env": { + "CM_DOWNLOAD_FILENAME": "resnet50_v1_modified.onnx", + "CM_PACKAGE_URL": "https://drive.google.com/uc?id=15wZ_8Vt12cb10IEBsln8wksD1zGwlbOM" } }, - "onnxruntime": { - "alias": "onnx" - }, - "pytorch,fp32": { + "onnx,opset-11": { "env": { - "CM_PACKAGE_URL": "https://zenodo.org/record/4588417/files/resnet50-19c8e357.pth" + "CM_PACKAGE_URL": "https://zenodo.org/record/4735647/files/resnet50_v1.onnx" } }, - "ncnn": { - "group": "framework", + "onnx,opset-8": { "env": { - "CM_ML_MODEL_FRAMEWORK": "ncnn" + "CM_PACKAGE_URL": "https://zenodo.org/record/2592612/files/resnet50_v1.onnx" } }, - "ncnn,fp32": { + "onnxruntime": { + "alias": "onnx" + }, + "opset-11": { "env": { - "CM_PACKAGE_URL": "https://zenodo.org/record/8073420/files/resnet50_v1.bin?download=1" + "CM_ML_MODEL_ONNX_OPSET": "11" }, - "post_deps": [ - { - "tags": "download-and-extract,_url.https://zenodo.org/record/8073420/files/resnet50_v1.param?download=", - "env": { - "CM_EXTRACT_EXTRACTED_FILENAME": "<<>>" - }, - "extra_cache_tags": "ml-model-params,params,resnet50,ncnn,model-params" - } - ] + "group": "opset-version" }, - "pytorch,int8": { - "base": [ - "int8", - "pytorch" - ], + "opset-8": { "env": { - "CM_PACKAGE_URL": "https://zenodo.org/record/4589637/files/resnet50_INT8bit_quantized.pt" - } + "CM_ML_MODEL_ONNX_OPSET": "8" + }, + "group": "opset-version" }, "pytorch": { - "group": "framework", "env": { "CM_ML_MODEL_DATA_LAYOUT": "NCHW", "CM_ML_MODEL_FRAMEWORK": "pytorch", + "CM_ML_MODEL_GIVEN_CHANNEL_MEANS": "?", "CM_ML_MODEL_INPUT_LAYER_NAME": "input_tensor:0", + "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor:0\\\": [BATCH_SIZE, 3, 224, 224]", "CM_ML_MODEL_OUTPUT_LAYERS": "output", "CM_ML_MODEL_OUTPUT_LAYER_NAME": "?", - "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor:0\\\": [BATCH_SIZE, 3, 224, 224]", - "CM_ML_MODEL_GIVEN_CHANNEL_MEANS": "?", "CM_ML_STARTING_WEIGHTS_FILENAME": "<<>>" - } - }, - "argmax": { - "group": "model-output", - "default": true, - "env": { - "CM_ML_MODEL_OUTPUT_LAYER_ARGMAX": "yes" - } - }, - "no-argmax": { - "group": "model-output", - "env": { - "CM_ML_MODEL_OUTPUT_LAYER_ARGMAX": "no" - } - }, - "tflite,argmax": { - "env": { - "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor 2\\\": (BATCH_SIZE, 224, 224, 3)", - "CM_PACKAGE_URL": "https://www.dropbox.com/s/cvv2zlfo80h54uz/resnet50_v1.tflite.gz?dl=1", - "CM_DAE_EXTRACT_DOWNLOADED": "yes", - "CM_ML_MODEL_FILE": "resnet50_v1.tflite", - "CM_EXTRACT_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH", - "CM_DOWNLOAD_FINAL_ENV_NAME": "" - } + }, + "group": "framework" }, - "tflite,no-argmax": { + "pytorch,fp32": { "env": { - "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor 2\\\": (BATCH_SIZE, 224, 224, 3)", - "CM_PACKAGE_URL": "https://www.dropbox.com/s/vhuqo0wc39lky0a/resnet50_v1.no-argmax.tflite?dl=1", - "CM_ML_MODEL_FILE": "resnet50_v1.no-argmax.tflite" + "CM_PACKAGE_URL": "https://zenodo.org/record/4588417/files/resnet50-19c8e357.pth" } }, - "tflite,int8,no-argmax": { + "pytorch,int8": { + "base": [ + "int8", + "pytorch" + ], "env": { - "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor 2\\\": (BATCH_SIZE, 224, 224, 3)", - "CM_PACKAGE_URL": "https://zenodo.org/record/8234946/files/resnet50_quant_full_mlperf_edgetpu.tflite?download=1", - "CM_ML_MODEL_FILE": "resnet50_quant_full_mlperf_edgetpu.tflite", - "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH" + "CM_PACKAGE_URL": "https://zenodo.org/record/4589637/files/resnet50_INT8bit_quantized.pt" } }, - "tflite": { - "group": "framework", + "tensorflow": { "env": { - "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor 2\\\": (BATCH_SIZE, 224, 224, 3)", "CM_ML_MODEL_ACCURACY": "76.456", "CM_ML_MODEL_DATA_LAYOUT": "NHWC", - "CM_ML_MODEL_FRAMEWORK": "tflite", + "CM_ML_MODEL_FRAMEWORK": "tensorflow", "CM_ML_MODEL_GIVEN_CHANNEL_MEANS": "123.68 116.78 103.94", "CM_ML_MODEL_INPUT_LAYERS": "input_tensor", "CM_ML_MODEL_INPUT_LAYER_NAME": "input_tensor", + "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor:0\\\": (BATCH_SIZE, 3, 224, 224)", "CM_ML_MODEL_NORMALIZE_DATA": "0", "CM_ML_MODEL_OUTPUT_LAYERS": "softmax_tensor", "CM_ML_MODEL_OUTPUT_LAYER_NAME": "softmax_tensor", "CM_ML_MODEL_STARTING_WEIGHTS_FILENAME": "<<>>", - "CM_ML_MODEL_SUBTRACT_MEANS": "YES" + "CM_ML_MODEL_SUBTRACT_MEANS": "YES", + "CM_PACKAGE_URL": "https://zenodo.org/record/2535873/files/resnet50_v1.pb" + }, + "group": "framework" + }, + "tensorflow,fix-input-shape": { + "deps": [ + { + "names": [ + "tensorflow" + ], + "tags": "get,generic-python-lib,_package.tensorflow" + } + ], + "env": { + "CM_ML_MODEL_TF_FIX_INPUT_SHAPE": "yes" } }, "tf": { "alias": "tensorflow" }, - "tensorflow": { - "group": "framework", + "tflite": { "env": { - "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor:0\\\": (BATCH_SIZE, 3, 224, 224)", "CM_ML_MODEL_ACCURACY": "76.456", "CM_ML_MODEL_DATA_LAYOUT": "NHWC", - "CM_ML_MODEL_FRAMEWORK": "tensorflow", + "CM_ML_MODEL_FRAMEWORK": "tflite", "CM_ML_MODEL_GIVEN_CHANNEL_MEANS": "123.68 116.78 103.94", "CM_ML_MODEL_INPUT_LAYERS": "input_tensor", "CM_ML_MODEL_INPUT_LAYER_NAME": "input_tensor", + "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor 2\\\": (BATCH_SIZE, 224, 224, 3)", "CM_ML_MODEL_NORMALIZE_DATA": "0", "CM_ML_MODEL_OUTPUT_LAYERS": "softmax_tensor", "CM_ML_MODEL_OUTPUT_LAYER_NAME": "softmax_tensor", "CM_ML_MODEL_STARTING_WEIGHTS_FILENAME": "<<>>", - "CM_ML_MODEL_SUBTRACT_MEANS": "YES", - "CM_PACKAGE_URL": "https://zenodo.org/record/2535873/files/resnet50_v1.pb" - } - }, - "fix-input-shape": { - "deps": [ - { - "tags": "get,python3", - "names": [ - "python", - "python3" - ] - } - ] - }, - "tensorflow,fix-input-shape": { - "env": { - "CM_ML_MODEL_TF_FIX_INPUT_SHAPE": "yes" + "CM_ML_MODEL_SUBTRACT_MEANS": "YES" }, - "deps": [ - { - "tags": "get,generic-python-lib,_package.tensorflow", - "names": [ "tensorflow" ] - } - ] + "group": "framework" }, - "uint8": { - "group": "precision", + "tflite,argmax": { "env": { - "CM_ML_MODEL_INPUT_DATA_TYPES": "uint8", - "CM_ML_MODEL_PRECISION": "uint8", - "CM_ML_MODEL_WEIGHT_DATA_TYPES": "uint8" + "CM_DAE_EXTRACT_DOWNLOADED": "yes", + "CM_DOWNLOAD_FINAL_ENV_NAME": "", + "CM_EXTRACT_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH", + "CM_ML_MODEL_FILE": "resnet50_v1.tflite", + "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor 2\\\": (BATCH_SIZE, 224, 224, 3)", + "CM_PACKAGE_URL": "https://www.dropbox.com/s/cvv2zlfo80h54uz/resnet50_v1.tflite.gz?dl=1" } }, - "batch_size.1": { + "tflite,int8,no-argmax": { "env": { - "CM_ML_MODEL_BATCH_SIZE": "1" + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH", + "CM_ML_MODEL_FILE": "resnet50_quant_full_mlperf_edgetpu.tflite", + "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor 2\\\": (BATCH_SIZE, 224, 224, 3)", + "CM_PACKAGE_URL": "https://zenodo.org/record/8234946/files/resnet50_quant_full_mlperf_edgetpu.tflite?download=1" } }, - "batch_size.#": { + "tflite,no-argmax": { "env": { - "CM_ML_MODEL_BATCH_SIZE": "#" + "CM_ML_MODEL_FILE": "resnet50_v1.no-argmax.tflite", + "CM_ML_MODEL_INPUT_SHAPES": "\\\"input_tensor 2\\\": (BATCH_SIZE, 224, 224, 3)", + "CM_PACKAGE_URL": "https://www.dropbox.com/s/vhuqo0wc39lky0a/resnet50_v1.no-argmax.tflite?dl=1" } + }, + "uint8": { + "env": { + "CM_ML_MODEL_INPUT_DATA_TYPES": "uint8", + "CM_ML_MODEL_PRECISION": "uint8", + "CM_ML_MODEL_WEIGHT_DATA_TYPES": "uint8" + }, + "group": "precision" } } } From 0730b2e8fea07dcd14fa31b7ea74be00c5cec1d8 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sat, 3 Feb 2024 11:54:51 +0100 Subject: [PATCH 11/15] clean up --- README.md | 54 ++++++++++++++++++++------------------------------ docs/README.md | 2 +- 2 files changed, 23 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index f6e6ff68bb..a7a29c9fa8 100755 --- a/README.md +++ b/README.md @@ -17,44 +17,35 @@ ### About -**Collective Mind (CM)** is a [community project](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) to develop +Collective Mind (CM) is a [community project](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) to develop a [collection of portable, extensible, technology-agnostic and ready-to-use automation recipes with a human-friendly interface (aka CM scripts)](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) -that automate all the manual steps required to build, run, benchmark and optimize complex ML/AI applications on any platform -with any software and hardware. - -CM scripts are being developed based on the feedback from -[MLCommons engineers and researchers](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) -to help them assemble, run, benchmark and optimize complex AI/ML applications -across diverse and continuously changing models, data sets, software and hardware -from Nvidia, Intel, AMD, Google, Qualcomm, Amazon and other vendors. +that help to automate all the manual steps required to prepare, build, run, benchmark and optimize complex ML/AI applications +on any platform with any software and hardware. They require Python 3.7+ with minimal dependencies and can run natively on Ubuntu, MacOS, Windows, RHEL, Debian, Amazon Linux and any other operating system, in a cloud or inside automatically generated containers. -Some key requirements for the CM design are: -* must be non-intrusive and easy to debug, require zero changes to existing projects and must complement, - reuse, wrap and interconnect all existing automation scripts and tools (such as cmake, ML workflows, +CM scripts were originally developed based on the following requirements from the +[MLCommons engineers and researchers](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +to help them automatically build, benchmark and optimize complex MLPerf benchmarks +across diverse and continuously changing models, data sets, software and hardware +from Nvidia, Intel, AMD, Google, Qualcomm, Amazon and other vendors: +* must work out of the box with the default options and without the need to edit some paths, environment variables and configuration files; +* must be non-intrusive, easy to debug and must reuse existing + user scripts and automation tools (such as cmake, make, ML workflows, python poetry and containers) rather than substituting them; * must have a very simple and human-friendly command line with a Python API and minimal dependencies; * must require minimal or zero learning curve by using plain Python, native scripts, environment variables - and simple JSON/YAML descriptions instead of inventing new languages; -* must run in a native environment with Ubuntu, Debian, RHEL, Amazon Linux, MacOS, Windows - and any other operating system while automatically generating container snapshots - with CM recipes for repeatability and reproducibility. - -Below you can find a few examples of this collaborative engineering effort sponsored -by [MLCommons (non-profit organization with 125+ members)](https://mlcommons.org) - -a few most-commonly used [automation recipes](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) -that can be chained into more complex automation workflows [using simple JSON or YAML](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml). + and simple JSON/YAML descriptions instead of inventing new workflow languages; +* must have the same interface to run all automations natively, in a cloud or inside containers. -You can try them yourself (you only need Python 3.7+, PIP, git and wget installed and optionally Docker if you want to -run CM scripts via automatically-generated containers - check the [installation guide](docs/installation.md) for more details). +Below you can find and try a few examples of the most-commonly used [automation recipes](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) +that can be chained into more complex automation workflows [using simple JSON or YAML](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml). -*Note that the Collective Mind concept is to continue improving portability and functionality -of all CM automation recipes across rapidly evolving models, data sets, software and hardware -based on collaborative testing and feedback - don't hestiate to report encountered issues -[here](https://github.com/mlcommons/ck/issues) and/or contact us via [public Discord Server](https://discord.gg/JjWNWXKxwT) -to help this community effort!* +*Note that MLCommons CM is a collaborative engineering effort to gradually improve portability and functionality +across continuously changing models, data sets, software and hardware based on your feedback - +please check this [installation guide](installation.md), report encountered issues [here](https://github.com/mlcommons/ck/issues) +and contact us via [public Discord Server](https://discord.gg/JjWNWXKxwT) to help this community effort!*
@@ -174,13 +165,13 @@ to modularize, run and benchmark other software projects and make it easier to rerun, reproduce and reuse [research projects from published papers at Systems and ML conferences]( https://cTuning.org/ae/micro2023.html ). -Please check the [**Getting Started Guide**](https://github.com/mlcommons/ck/blob/master/docs/getting-started.md) +Please check the [**Getting Started Guide and FAQ**](https://github.com/mlcommons/ck/blob/master/docs/getting-started.md) to understand how CM automation recipes work, how to use them to automate your own projects, and how to implement and share new automations in your public or private projects. ### Documentation -* [Getting Started Guide](docs/getting-started.md) +* [Getting Started Guide and FAQ](docs/getting-started.md) * [CM interface for MLPerf benchmarks](docs/mlperf) * [CM interface for ML and Systems conferences](docs/tutorials/common-interface-to-reproduce-research-projects.md) * [CM automation recipes for MLOps and DevOps](cm-mlops/script) @@ -204,5 +195,4 @@ our goal is to help everyone automate all manual and repetitive tasks to build, run, benchmark and optimize AI systems including downloading artifacts, installing tools, resolving dependencies, running experiments, processing logs, and reproducing results -on any software/hardware stack - you can reach us via [public Discord server](https://discord.gg/JjWNWXKxwT) -to discuss this project. +on any software/hardware stack - don't hesitate to get in touch via [public Discord server](https://discord.gg/JjWNWXKxwT)! diff --git a/docs/README.md b/docs/README.md index 31f3bda991..1812bc5ccd 100644 --- a/docs/README.md +++ b/docs/README.md @@ -9,7 +9,7 @@ The goal is to provide a common, simple and human-readable interface to manage, and customize diverse AI/ML apps, benchmarks and research projects across continuously changing models, datasets, software and hardware from different vendors in a unified and automated way. -* [Getting Started Guide](getting-started.md) +* [Getting Started Guide and FAQ](getting-started.md) * [Introduction](introduction-cm.md) * [CM installation and customization](installation.md) * [Unified CLI and Python API](interface.md) From 72897b636e71cc3b3844c58311dab0e44c4a023a Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sat, 3 Feb 2024 13:10:22 +0100 Subject: [PATCH 12/15] added support to print directories and files for a given CM entry via "cm info {automation} {artifact|--tags}" --- cm/CHANGES.md | 6 +++-- cm/README.md | 50 +++++++++++++++++++----------------------- cm/cmind/automation.py | 19 +++++++++++++++- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/cm/CHANGES.md b/cm/CHANGES.md index 4d2f831d8e..234106a716 100644 --- a/cm/CHANGES.md +++ b/cm/CHANGES.md @@ -1,6 +1,8 @@ ## V1.6.1 - - improving --help for common automations and CM scripts (automation recipes) - - fixing a few minor bugs + - improved --help for common automations and CM scripts (automation recipes) + - fixed a few minor bugs + - added support to print directories and files for a given CM entry + via "cm info {automation} {artifact|--tags}" ## V1.6.0 - added support for Python 3.12 (removed "pkg" dependency) diff --git a/cm/README.md b/cm/README.md index 49d042d324..156870baf4 100644 --- a/cm/README.md +++ b/cm/README.md @@ -9,44 +9,35 @@ ### About -**Collective Mind (CM)** is a [community project](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) to develop +Collective Mind (CM) is a [community project](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) to develop a [collection of portable, extensible, technology-agnostic and ready-to-use automation recipes with a human-friendly interface (aka CM scripts)](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) -that automate all the manual steps required to build, run, benchmark and optimize complex ML/AI applications on any platform -with any software and hardware. - -CM scripts are being developed based on the feedback from -[MLCommons engineers and researchers](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) -to help them assemble, run, benchmark and optimize complex AI/ML applications -across diverse and continuously changing models, data sets, software and hardware -from Nvidia, Intel, AMD, Google, Qualcomm, Amazon and other vendors. +that help to automate all the manual steps required to prepare, build, run, benchmark and optimize complex ML/AI applications +on any platform with any software and hardware. They require Python 3.7+ with minimal dependencies and can run natively on Ubuntu, MacOS, Windows, RHEL, Debian, Amazon Linux and any other operating system, in a cloud or inside automatically generated containers. -Some key requirements for the CM design are: -* must be non-intrusive and easy to debug, require zero changes to existing projects and must complement, - reuse, wrap and interconnect all existing automation scripts and tools (such as cmake, ML workflows, +CM scripts were originally developed based on the following requirements from the +[MLCommons engineers and researchers](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +to help them automatically build, benchmark and optimize complex MLPerf benchmarks +across diverse and continuously changing models, data sets, software and hardware +from Nvidia, Intel, AMD, Google, Qualcomm, Amazon and other vendors: +* must work out of the box with the default options and without the need to edit some paths, environment variables and configuration files; +* must be non-intrusive, easy to debug and must reuse existing + user scripts and automation tools (such as cmake, make, ML workflows, python poetry and containers) rather than substituting them; * must have a very simple and human-friendly command line with a Python API and minimal dependencies; * must require minimal or zero learning curve by using plain Python, native scripts, environment variables - and simple JSON/YAML descriptions instead of inventing new languages; -* must run in a native environment with Ubuntu, Debian, RHEL, Amazon Linux, MacOS, Windows - and any other operating system while automatically generating container snapshots - with CM recipes for repeatability and reproducibility. - -Below you can find a few examples of this collaborative engineering effort sponsored -by [MLCommons (non-profit organization with 125+ organizations)](https://mlcommons.org) - -a few most-commonly used [automation recipes](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) -that can be chained into more complex automation workflows [using simple JSON or YAML](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml). + and simple JSON/YAML descriptions instead of inventing new workflow languages; +* must have the same interface to run all automations natively, in a cloud or inside containers. -You can try them yourself (you only need Python 3.7+, PIP, git and wget installed and optionally Docker if you want to -run CM scripts via automatically-generated containers - check the [installation guide](docs/installation.md) for more details). +Below you can find and try a few examples of the most-commonly used [automation recipes](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) +that can be chained into more complex automation workflows [using simple JSON or YAML](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml). -*Note that the Collective Mind concept is to continue improving portability and functionality -of all CM automation recipes across rapidly evolving models, data sets, software and hardware -based on collaborative testing and feedback - don't hestiate to report encountered issues -[here](https://github.com/mlcommons/ck/issues) and/or contact us via [public Discord Server](https://discord.gg/JjWNWXKxwT) -to help this community effort!* +*Note that MLCommons CM is a collaborative engineering effort to gradually improve portability and functionality +across continuously changing models, data sets, software and hardware based on your feedback - +please check this [installation guide](installation.md), report encountered issues [here](https://github.com/mlcommons/ck/issues) +and contact us via [public Discord Server](https://discord.gg/JjWNWXKxwT) to help this community effort!* #### CM human-friendly command line @@ -81,9 +72,12 @@ cm show cache "get ml-model stable-diffusion" cmr "get generic-python-lib _package.onnxruntime" --version_min=1.16.0 cmr "python app image-classification onnx" --input=computer_mouse.jpg +cmr "python app image-classification onnx" --input=computer_mouse.jpg --debug + cm rm cache -f cmr "python app image-classification onnx" --input=computer_mouse.jpg --adr.onnxruntime.version_max=1.16.0 + cmr "get cuda" --version_min=12.0.0 --version_max=12.3.1 cmr "python app image-classification onnx _cuda" --input=computer_mouse.jpg diff --git a/cm/cmind/automation.py b/cm/cmind/automation.py index f898423b40..297d26007b 100644 --- a/cm/cmind/automation.py +++ b/cm/cmind/automation.py @@ -1223,9 +1223,26 @@ def info(self, i): print ('CID = ' + cid) print ('CID1 = ' + cid1) print ('CID2 = ' + full_cid) + print ('') print ('Path = ' + path) - # Attempt to copy to clipboard the last CID + p_dirs=[] + p_files=[] + for p in os.listdir(path): + p_dirs.append(p) if os.path.isdir(os.path.join(path,p)) else p_files.append(p) + + if len(p_dirs)>0 or len(p_files)>0: + for x in [('Directories',p_dirs), + ('Files',p_files)]: + x0 = x[0] + x1 = x[1] + if len(x1)>0: + print ('') + print (' '+x0+':') + for p in sorted(x1): + print (' ' + p) + + # Attempt to copy to clipboard the last CID if cid1 !='': clipboard = full_cid if i.get('uid', False): clipboard = cid1 From 29b571d273b90b9681fdbfa396606f04f50c8b96 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sat, 3 Feb 2024 14:36:42 +0100 Subject: [PATCH 13/15] first draft of getting started guide + FAQ --- README.md | 6 +- cm-mlops/script/gui/app.py | 44 +-- docs/getting-started.md | 650 ++++++++++++++++++++++++++++++++++++- 3 files changed, 675 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index a7a29c9fa8..d276667be8 100755 --- a/README.md +++ b/README.md @@ -181,9 +181,9 @@ and how to implement and share new automations in your public or private project ### Motivation and concepts -* ACM REP'23 keynote about MLCommons CM: [slides](https://doi.org/10.5281/zenodo.8105339) -* ACM TechTalk'21 about automating research projects: [YouTube](https://www.youtube.com/watch?v=7zpeIVwICa4) -* MLPerf inference submitter orientation: [v4.0 slides](https://doi.org/10.5281/zenodo.10605079), [v3.1 slides](https://doi.org/10.5281/zenodo.8144274) +* ACM REP'23 keynote about MLCommons CM: [ [slides](https://doi.org/10.5281/zenodo.8105339) ] [ [YouTube](https://youtu.be/_1f9i_Bzjmg) ] +* ACM TechTalk'21 about automating research projects: [ [YouTube](https://www.youtube.com/watch?v=7zpeIVwICa4) ] [ [slides](https://learning.acm.org/binaries/content/assets/leaning-center/webinar-slides/2021/grigorifursin_techtalk_slides.pdf) ] +* MLPerf inference submitter orientation: [ [v4.0 slides](https://doi.org/10.5281/zenodo.10605079) ] [ [v3.1 slides](https://doi.org/10.5281/zenodo.8144274) ] ### Get in touch diff --git a/cm-mlops/script/gui/app.py b/cm-mlops/script/gui/app.py index 70e81b548f..0db457dd85 100644 --- a/cm-mlops/script/gui/app.py +++ b/cm-mlops/script/gui/app.py @@ -23,35 +23,36 @@ def main(): no_run = os.environ.get('CM_GUI_NO_RUN', '') # Check if script tags are specified from CMD - script_tags = '' + script_tags = os.environ.get('CM_GUI_SCRIPT_TAGS','').strip() + script_tags_from_url = query_params.get('tags',['']) + if len(script_tags_from_url)>0: + x_script_tags_from_url = script_tags_from_url[0].strip() + if x_script_tags_from_url != '': + script_tags = x_script_tags_from_url meta = {} - if len(script_tags_from_url)>0: - script_tags = script_tags_from_url[0] - if script_tags !='': - # Check type of tags - if ' ' in script_tags: - script_tags = script_tags.replace(' ',',') + if script_tags !='': + # Check type of tags + if ' ' in script_tags: + script_tags = script_tags.replace(' ',',') - print ('Searching CM scripts using tags "{}"'.format(script_tags)) + print ('Searching CM scripts using tags "{}"'.format(script_tags)) - r = cmind.access({'action':'find', - 'automation':'script,5b4e0237da074764', - 'tags':script_tags}) - if r['return']>0: return r + r = cmind.access({'action':'find', + 'automation':'script,5b4e0237da074764', + 'tags':script_tags}) + if r['return']>0: return r - lst = r['list'] + lst = r['list'] + + if len(lst)==1: + script = lst[0] + meta = script.meta + script_alias = meta['alias'] - if len(lst)==1: - script = lst[0] - meta = script.meta - script_path = script.path - script_alias = meta['alias'] - if script_tags == '': - script_tags = os.environ.get('CM_GUI_SCRIPT_TAGS','') # Read meta if len(meta)==0 and script_path!='' and os.path.isdir(script_path): @@ -59,12 +60,15 @@ def main(): r = cmind.utils.load_yaml_and_json(fn) if r['return'] == 0: meta = r['meta'] + script_path = script.path + script_alias = meta['alias'] if meta.get('gui_title','')!='': title = meta['gui_title'] # Set title st.title('Collective Mind GUI') + if script_alias!='': st.markdown('*CM script: "{}"*'.format(script_alias)) diff --git a/docs/getting-started.md b/docs/getting-started.md index 32b4f10b9c..4100248410 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -1,4 +1,650 @@ [ [Back to documentation](README.md) ] -*20240130: we are updating this page based on the feedback from the [CM users and MLPerf submitters](https://github.com/mlcommons/ck/issues/1052) - - it should be ready within a week - please [stay tuned](https://discord.gg/JjWNWXKxwT)*. \ No newline at end of file +# Collective Mind Getting Started Guide and FAQ + +
+Click here to see the table of contents. + +* [Collective Mind Getting Started Guide](#collective-mind-getting-started-guide) + * [Why CM?](#why-cm?) + * [CM automation recipe for image classification](#cm-automation-recipe-for-image-classification) + * [How CM scripts works?](#how-cm-scripts-works?) + * [How CM runs automation recipes?](#how-cm-runs-automation-recipes?) + * [How CM unifies inputs, outputs and environment variables?](#how-cm-unifies-inputs-outputs-and-environment-variables?) + * [How CM chains automation recipes into portable workflows?](#how-cm-chains-automation-recipes-into-portable-workflows?) + * [How to add new CM scripts?](#how-to-add-new-cm-scripts?) + * [How to customize CM scripts using variations?](#how-to-customize-cm-scripts-using-variations?) + * [How to cache and reuse CM scripts' output?](#how-to-cache-and-reuse-cm-scripts'-output?) + * [How to debug CM scripts?](#how-to-debug-cm-scripts?) + * [How to extend/improve CM scripts?](#how-to-extend/improve-cm-scripts?) + * [How to use CM with containers?](#how-to-use-cm-with-containers?) + * [How to use CM GUI?](#how-to-use-cm-gui?) + * [How to run MLPerf benchmarks via CM?](#how-to-run-mlperf-benchmarks-via-cm?) + * [How to use CM to reproduce research papers?](#how-to-use-cm-to-reproduce-research-papers?) + * [How to use CM to modularize any software project?](#how-to-use-cm-to-modularize-any-software-project?) + * [How to get in touch with the CM community?](#how-to-get-in-touch-with-the-cm-community?) + +
+ + +## Why CM? + +Collective Mind (CM) is a [community project](../CONTRIBUTING.md) to develop +a [collection of portable, extensible, technology-agnostic and ready-to-use automation recipes +with a human-friendly interface (aka CM scripts)](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) +that can help to automate all the manual steps required to prepare, build, run, benchmark and optimize complex ML/AI applications +on any platform with any software and hardware. +They require Python 3.7+ with minimal dependencies and can run natively on Ubuntu, MacOS, Windows, RHEL, Debian, Amazon Linux +and any other operating system, in a cloud or inside automatically generated containers. + +CM scripts were originally developed based on the following requirements from the +[MLCommons engineers and researchers](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +to help them automatically build, benchmark and optimize complex MLPerf benchmarks +across diverse and continuously changing models, data sets, software and hardware +from Nvidia, Intel, AMD, Google, Qualcomm, Amazon and other vendors: +* must work out of the box with the default options and without the need to edit some paths, environment variables and configuration files; +* must be non-intrusive, easy to debug and must reuse existing + user scripts and automation tools (such as cmake, make, ML workflows, + python poetry and containers) rather than substituting them; +* must have a very simple and human-friendly command line with a Python API and minimal dependencies; +* must require minimal or zero learning curve by using plain Python, native scripts, environment variables + and simple JSON/YAML descriptions instead of inventing new workflow languages; +* must have the same interface to run all automations natively, in a cloud or inside containers. + +Let's use a relatively simple image classification example to explain how CM achieves that +and how it helps to automate much more complex projects including [MLPerf benchmarks](mlperf) +and [reproducibility initatives](https://cTuning.org/ae/micro2023.html) +at ML and Systems conferences. + +
+Expand to see the feedback and requirements from MLCommons researchers and engineers + + +While image classification sounds like a trivial example nowadays, it may still require many manual steps +to download some validation data sets and models, install frameworks and low-level dependencies +and update various environment variables and paths depending on your platform and target hardware +(for example CPU vs CUDA). + +You may also need to make sure that all dependencies are compatible (for example that ONNX run-time +or PyTorch framework is compatible with your CUDA version, etc). +Of course, you can also develop a container and fix all the versions but what if you or someone else +want to try a different CUDA version or newer ONNX/TF/PyTorch framework or different operating system +or different model or different data set or different framework or different hardware? + +While helping MLCommons automate [MLPerf inference benchmarks](https://github.com/mlcommons/inference) +and run them across diverse models, data sets, software and hardware, +we've realized that there is no portable and technology-agnostic automation tool +that can handle such cases. + +The feedback from [MLCommons engineers and researchers](taskforce.md) motivated us +to develop a simple automation framework that can help them +assemble, run, benchmark and optimize complex AI/ML applications +across diverse and continuously changing models, data sets, software and hardware +from Nvidia, Intel, AMD, Google, Qualcomm, Amazon and other vendors. + +
+ +## CM automation recipe for image classification + +We designed CM as a [small Python library](https://github.com/mlcommons/ck/tree/master/cm) +with a human-friendly command line, simple Python API and minimal dependencies +needed to implement automation recipes (Python 3.7+, PIP, pyyaml, git, wget) +and chain them into portable workflows. CM scripts can run natively (development mode) +or inside containers that CM generates on the fly (stable mode). + +Most of the time, these dependencies are already installed on your platform. +In such case, you should be able to prepare and run image classification with ONNX, +ImageNet validation data set and ResNet-50 on Linux, MacOS, Windows and any other +operating system using a few CM commands: + + + +```bash +pip install cmind +cm pull repo mlcommons@ck +cm run script "python app image-classification onnx _cpu" +``` + + + +*Note that you may need to re-login when you install cmind for the first time + to let your platform pick up path to the `cm` command line front-end.* + +You can also run and customize above automation recipe in alternative ways as follows: + + + +```bash +cm run script "python app image-classification onnx _cpu" --help + +cm run script "download file _wget" --url=https://cKnowledge.org/ai/data/computer_mouse.jpg --verify=no --env.CM_DOWNLOAD_CHECKSUM=45ae5c940233892c2f860efdf0b66e7e +cm run script "python app image-classification onnx _cpu" --input=computer_mouse.jpg + +cmr "python app image-classification onnx _cpu" --input=computer_mouse.jpg +cmr --tags=python,app,image-classification,onnx,_cpu --input=computer_mouse.jpg +cmr 3d5e908e472b417e --input=computer_mouse.jpg + +cm docker script "python app image-classification onnx _cpu" --input=computer_mouse.jpg + +cm gui script "python app image-classification onnx _cpu" + +``` + + + +If you encounter some issues, please check [CM installation guide](installation.md) - +if it doesn't help you, please report your issues [here](https://github.com/mlcommons/ck/issues) +and/or contact us via our [public Discord server](https://discord.gg/JjWNWXKxwT) - +CM is a [community project](../CONTRIBUTING.md) being developed +and improved across diverse software and hardware based on your feedback! + + + +## How CM scripts works? + +Next, we briefly explain how CM commands work - it will help you understand +what happens when you see similar commands in MLPerf results, README files, +technical reports, research papers, Jupyter notebooks, +Google colab, containers, scripts and artifact appendices. + +Whenever you run `cm run script "python app image-classification onnx _cpu"` +or `cmr "python app image-classification onnx _cpu"`, +the [CM script automation](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/module.py) +will simply search for `_cm.yaml` and `_cm.json` files (CM meta-description dictionary) in all `script` +directories in all software projects registered in CM via `cm pull repo`. + +In our case, we've pulled [github.com/mlcommons/ck project](https://github.com/mlcommons/ck) +that has most MLCommons' CM automation recipes embedded +in a [`cm-mlops/script` directory](https://github.com/mlcommons/ck/tree/master/cm-mlops/script). + +*Note that you can pull any public or private Git repository, download any software project + or register any local directory in the CM to search for embedded automation recipes.* + +CM will then try to match all your tags without `_` prefix (`_` in tags mark +the so-called CM script variations that customize a give script behavior +and will be described later) with a `tags` list in the CM meta-description dictionary. +In our case, it will match the corresponding [`_cm.yaml`](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml#L9) +in `$HOME/CM/repos/mlcommons@ck/script/app-image-classification-onnx-py/_cm.yaml` - +a wrapper for a given CM automation recipe. + +*Note that if you use unique ID instead of tags to identify automation (such as `3d5e908e472b417e`), + CM will try to match `uid` string in the CM meta descriptions instead of tags.* + + +## How CM runs automation recipes? + +Whenever CM finds a directory with a requested automation recipe, +it performs the following steps: +* run `preprocess` function in `customize.py` if exists +* run `run.sh` (Linux) or `run.bat` (Windows) if exists +* run `postprocess` function in `customize.py` if exists + +Such organization makes it possible to use either Python or native OS scripts or +both to implement CM automation recipes while minimizing the learning curve +for CM understanding, development and debugging as requested by CM users. + +Furthermore, CM scripts can keep the source code of +image classification (as shown [here](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/app-image-classification-onnx-py/src)) +that we can easily move around +between projects without hardwiring paths and names. + +## How CM unifies inputs, outputs and environment variables? + +CM allows you to pass environment variables to `customize.py` +and native scripts using `--env.ENV=VALUE`. + +When you use some flags such as `--input` in our image classification +example, it will be also converted into an environment variable +using [`input_mapping` dictionary](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml#L78) +in the CM meta description of this script. + +All environment variables are aggregated in `env` dictionary inside CM +and then passed to `preprocess` function in `customize.py` where you can modify +it programmatically. + +They are then passed to the `run` script. Since new environment variables +are not preserved after `run` script, one can pass new environment variables +back to CM using `tmp-run-env.out` with ENV=KEY strings as shown [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/run.sh#L37) +or using `tmp-run-state.json` as shown [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/src/onnx_classify.py#L171). + +## How CM chains automation recipes into portable workflows? + +CM scripts provide a technology-agnostic wrapper with simple tags, CLI and Python API to prepare and run +user code snippets and native scripts/tools while unifying their inputs and outputs, paths and environment variables. + +Such architecture makes it possible to easily chain existing user scripts and tools into portable, technology-agnostic and powerful workflows +instead of substituting or rewriting them. + +It is possible to chain CM scripts using simple +[`deps` list](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml#L21) +in a meta description of a given script: + + + +```yaml +deps: +- tags: detect,os +- tags: get,sys-utils-cm +- names: + - python + - python3 + tags: get,python3 + +- tags: get,cuda + names: + - cuda + enable_if_env: + USE_CUDA: + - yes +- tags: get,cudnn + names: + - cudnn + enable_if_env: + USE_CUDA: + - yes + +- tags: get,dataset,imagenet,image-classification,original +- tags: get,dataset-aux,imagenet-aux,image-classification +- tags: get,ml-model,resnet50,_onnx,image-classification + names: + - ml-model + +- tags: get,generic-python-lib,_package.Pillow +- tags: get,generic-python-lib,_package.numpy +- tags: get,generic-python-lib,_package.opencv-python + + +- tags: get,generic-python-lib,_onnxruntime + names: + - onnxruntime + skip_if_env: + USE_CUDA: + - yes +- tags: get,generic-python-lib,_onnxruntime_gpu + names: + - onnxruntime + enable_if_env: + USE_CUDA: + - yes + +``` + + + +Each entry in this list is a dictionary that specifies which CM script to run using `tags`. +Internally, CM will be updating `env` dictionary (flat environment) and `state` dictionary +(to let scripts exchange complex data structures besides environment variables). + +If you run CM via command line, you can see internal `env` and `state` dictionaries by adding `-j` flag: + +```bash +cmr "python app image-classification onnx _cpu" --input=computer_mouse.jpg -j +``` + +*Note that we use similar approach for updating environment variables similar + to calling native scripts - by default, they do not alter environment + variables at the host. However, CM allows you to do that + by explicitly specifying which environment variables and state keys + will be updated at the host using `new_env_keys` and `new_state_keys` + in the meta of a given script as shown [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml#L83). + This helped us make behavior of complex CM workflows more deterministic + and reproducible.* + +Each sub-dependency can be turned on or off using environment variables +using `enable_if_env` dictionary or `disable_if_env` dictionary. + +You can also specify `version_min`, `version_max` and `version` in these +dependencies. You can also give them some specific names such as `python` +and pass versions and environment variables only to a specific script in a pipeline as follows: +```bash +cmr "python app image-classification onnx _cpu" --input=computer_mouse.jpg --adr.python.version_min=3.9 +``` + +This functionality is usually implemented inside ad-hoc bash or shell scripts +with many hardwired paths and names - CM simply makes such scripts and tools +portable and reusable while enabling technology-agnostic automation workflows +with a unified interface that can adapt to any operating system and are easy +to understand. + +We can now assemble complex automation workflows by reusing all portable +scripts from [the community](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md). + +In our example, we reused CM scripts to [detect OS features](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/detect-os), +install system dependencies on [any supported OS](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-sys-utils-cm) +(Ubuntu, MacOS, RHEL, Arch, Debian, SLES, Windows, etc), +detect or install Python and PIP packages, download and preprocess data sets and models, etc. + + + +## How to add new CM scripts? + +One the main requirement for CM was to provide a very light-weight connectors +between existing automation scripts and tools rather than substituting them. + +You can add your own scripts and tools to CM using the following command +that will create a ready-to-use dummy CM script: + +```bash +cm add script my-script --tags=my,script +``` + +You can already run this dummy script and plug it into other CM workflows: +```bash +cmr "my script" +``` + +You can also run it from python as follows: +```bash +import cmind +output=cmind.access({'action':'run', + 'automation':'script', + 'tags':'my,script}) +if output['return']==0: print (output) +``` + + +## How to customize CM scripts using variations? + +Sometimes we need to set multiple environment variables or run a set of extra CM scripts +for a specific purpose (different hardware target or model or dataset). + +We introduced special tags with `_`, called *variations* or *variation tags*, +that allow you to update a set of environment variables and add extra scripts +to the chain of dependencies. + +Such variations are defined using [`variations` dictionary](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml#L66) +in the meta description of a given CM script. + +For example, our script has 2 variations `_cuda` and `_cpu`. + +If you want to use CUDA implementation of the image classification example, +you can add this variation to the tags that will set `USE_CUDA` environment to `yes` +and will turn on a specific CM script in `deps` to install ONNX for CUDA: + +```bash +cmr "python app image-classification onnx _cuda" --input=computer_mouse.jpg +``` + +## How to cache and reuse CM scripts' output? + +By default, CM scripts run in the current directory and record all new files there. + +For example, the following universal download script will download +computer mouse image to the current directory: + + + +```bash +cm run script "download file _wget" --url=https://cKnowledge.org/ai/data/computer_mouse.jpg --verify=no --env.CM_DOWNLOAD_CHECKSUM=45ae5c940233892c2f860efdf0b66e7e +``` + + + +In some cases, we want to cache and reuse the output of automation recipes (such as downloading models, preprocessing data sets or building some applications) +rather than just downloading it to the current directory. + +Following the feedback from our users, we implemented a `cache` automation in CM similar to `script`. +Whenever CM encounters `"cache":true` in a meta description of a given script, it will create +a `cache` directory in `$HOME/CM/repos/local` with some unique ID and the same tags as `script`, +and will execute that script there to record all the data in cache. + +Whenever the same CM script is executed and CM finds an associated cache entry, +it will skip execution and will reuse files from that entry. + +Furthermore, it is possible to reuse large cached files in other projects that call the same CM scripts! + +You can see cache entries and find a specific one as follows: + +```bash +cmr "get ml-model resnet50 _onnx" -j + +cm show cache +cm show cache "get ml-model resnet50 _onnx" +cm find cache "download file ml-model resnet50 _onnx" +cm info cache "download file ml-model resnet50 _onnx" +``` + +You can clean some cache entries as follows: +```bash +cm rm cache --tags=ml-model,resnet50 +``` + +You can also clean all CM `cache` entries and start from scratch as follows: +```bash +cm rm cache -f +``` + +In fact, you can remove `$HOME/CM` to reset CM framework completely +and remove all downloaded repositories and cached entries. + + + +## How to use CM with Python virtual environments? + + +Using CM `cache` makes it possible to run CM automations for multiple virtual environments +installed inside CM `cache` entries. It is possible to run CM automations with different Python +virtual environments transparently to users while avoiding messing up native user environment. + +We created the following CM automation recipe to create virtual environments: + +```bash +cmr "install python-venv" --name=mlperf +cm show cache "python-venv name-mlperf" +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" +``` + +If you now run our image classification automation recipe, +it will reuse model and dataset from the cache, but will +use + + +## How to debug CM scripts? + +One of the requirements from CM users was to avoid new and/or complex ways to debug CM automations. +Using native scripts and Python code makes it possible to apply standard techniques and tools to debug CM automations. + +We were also asked to add `--debug` flag to open a shell after the last native script is executed - +this allows users to rerun the last command line with all environment variables and paths assembled by CM +while having a full and native access to change environment and run the final command +(such as pinning threads, changing batch sizes, modifying files, etc). + +You can try it as follows on Linux, MacOS, Windows or other operating system as follows: + +```bash +cmr "python app image-classification onnx _cpu" --input=computer_mouse.jpg --debug + +``` + +You can also use GDB via environment variable `--env.CM_RUN_PREFIX="gdb --args "` +to run the final command via GDB. + + + +## How to extend/improve CM scripts? + +CM is a [community project](../CONTRIBUTING.md) where [CM scripts](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) +are continuously improved to run on different hardware with different software +while keeping backward compatibility through the unified CM interface, tags and variations. + +Whenever you encounter an issue or want to have support for your own project and environment, +please update these scripts and send a PR to the [CM GitHub](https://github.com/mlcommons/ck). + +You can also reach us via [public Discord server](https://discord.gg/JjWNWXKxwT) +if you questions or suggestions. + + + + + +## How to use CM with containers? + +One of the key requirements for CM was to run automation natively or inside containers in the same way. + +We want CM scripts to adapt to the current/latest environment natively or run in the +container automatically generated on the fly when requested by user for more stability and determinism. + +In such case, we can get rid of separate development of native scripts/workflows and Dockerfile +and use the same CM commands instead. + +To run a given script in an automatically-generated container, you can simply substitute `cm run script` +with `cm docker script` or `cmr` with `cmrd`: + +```bash +cm docker script "python app image-classification onnx _cpu" +``` + +CM will automatically generate a Dockerfile with Ubuntu 22.04 in the `dockerfiles` +directory of a given script, will build container with the same CM command +and will run it inside container. + +* If you want to stay in the container, you can add flag `--docker_it`. +* You can change OS inside container using `--docker_base_image`, `--docker_os` and `--docker_os_version`. + +The tricky part is when we want to use host files and directories with a given CM script inside container. +To make it easier for users, we have implemented automatic detection and mounting of files and directories +in CM script. + +Developers of a CM script just need to specify which flags and environment variables are local files or directories +using `input_paths` in `docker` dictionary of the meta-description of this script: + +```yaml +docker: + skip_run_cmd: 'no' + all_gpus: 'yes' + input_paths: + - input + - env.CM_IMAGE + - output + skip_input_for_fake_run: + - input + - env.CM_IMAGE + - output + - j + pre_run_cmds: + - echo \"CM pre run commands\" +``` + +When you run the same script via container with the local computer_mouse.jpg file as an input, +CM will automatically mount current directory and will update the input to the CM script +inside container with the internal path: + + + +```bash +cm docker script "python app image-classification onnx _cpu" --input=computer_mouse.jpg + +... + +docker build -f D:\Work1\CM\ck\cm-mlops\script\app-image-classification-onnx-py\dockerfiles\ubuntu_22.04.Dockerfile \ + -t cknowledge/cm-script-app-image-classification-onnx-py:ubuntu-22.04-latest . + +... + +Container launch command: +docker run --entrypoint "" --gpus=all -v D:\Work1\CM\ck\docs\computer_mouse.jpg:/cm-mount/Work1/CM/ck/docs/computer_mouse.jpg + cknowledge/cm-script-app-image-classification-onnx-py:ubuntu-22.04-latest + bash -c "echo \"CM pre run commands\" && + cm run script --tags=python,app,image-classification,onnx,_cpu + --input=/cm-mount/Work1/CM/ck/docs/computer_mouse.jpg " + +CM pre run commands + + +``` + + + +It is now possible to download large data sets and models to the host from CM containers +or pass host scratch pads and data to CM containers transparently to a user! + + + +## How to use CM GUI to run automation recipes? + +Another request from CM/MLCommons users was to have a simple GUI that can generate CM commands with user-friendly selector. + +We've implemented a CM script called [`gui`](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/gui) +that provides a universal Streamlit GUI for any CM script. + +You just need to describe the inputs for a given script via [meta-description](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml#L91) +as shown for our image classification example: + +```yaml +input_description: + input: + desc: "Path to JPEG image to classify" + output: + desc: "Output directory (optional)" + j: + desc: "Print JSON output" + boolean: true +``` + +You can run this GUI for your CM script as follows: +```bash +cm gui script "python app image-classification onnx _cpu" +``` + +This GUI will allow you to customize your script and run it on your host. + + +## How to run MLPerf benchmarks via CM? + +CM was originally designed to make it easier to run [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549). + +While MLPerf inference has a common benchmarking engine called [loadgen](https://github.com/mlcommons/inference/tree/master/loadgen), +setting up a given platform, installing all tools, downloading and preprocessing all models and data sets, +updating paths and environment variables, figuring out default parameters for various scenarios, preparing a loadgen command line, +keeping track of continuous updates in MLPerf rules, running multiple experiments and submitting results +is a major challenge for old and new submitters (see [MLPerf inference v4.0 submitter orientation for automation](https://doi.org/10.5281/zenodo.10605079). + +We created several CM scripts to prepare and run different implementations of MLPerf inference (reference, Nvidia, Intel, Qualcomm, Deep Sparse, etc) +with a master CM script to run them all out-of-the-box natively or inside automatically-generated containers +[run-mlperf-inference-app](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-app). +CM helped us to implement it as a simple pipeline with a common and human-friendly interface while reusing all existing automation recipes. + +This script was successfully validated to [modularize MLPerf inference benchmarks](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +and help the community automate more than 95% of all performance and power submissions in the v3.1 round +across more than 120 system configurations (models, frameworks, hardware) +while reducing development and maintenance costs. + +Please check this [documentation](mlperf/inference) for more details. + + +## How to use CM to reproduce research papers? + +Following the successful validation of CM concept to modularize and run MLPerf inference benchmarks across diverse software and hardware, +the community test it to make it easier to reproduce results from research papers during artifact evaluation and other reproducibility +initiatives at [systems conferences](https://ctuning.org/ae/micro2023.html). + +The idea is to provide a common interface to prepare and run experiments from research papers. +See the latest CM scripts to rerun some experiments from the [ACM/IEEE MICRO'23 conference](https://github.com/ctuning/cm-reproduce-research-projects/tree/main/script) +and from the [Student Cluster Competition at Supercomputing'23](tutorials/scc23-mlperf-inference-bert.md). + + +## How to use CM as a common interface to other projects? + +While CM was successfully validated to unify, modularize and automate MLPerf benchmarks, +it is applicable to any software project. + +The community started using CM automation recipes as a common and human-friendly interface +to run other software projects and manage experiments across diverse models, data sets, software and hardware +while making them more modular, portable and reusable. + +Please check [other CM tutorials](tutorials), [CM documentation](README.md) and our [ACM REP'23 keynote](https://www.youtube.com/watch?v=7zpeIVwICa4) +for more details. + + +## Where to read about the CM vision and history? + +* ACM REP'23 keynote about MLCommons CM: [slides](https://doi.org/10.5281/zenodo.8105339) [YouTube](https://youtu.be/_1f9i_Bzjmg) +* ACM TechTalk'21 about automating research projects: [YouTube](https://www.youtube.com/watch?v=7zpeIVwICa4) [slides](https://learning.acm.org/binaries/content/assets/leaning-center/webinar-slides/2021/grigorifursin_techtalk_slides.pdf) +* [Project history](history.md) + + +## How to get in touch with the CM community? + +This is a community project being developed by the [MLCommons Task Force on Automation and Reproducibility](taskforce.md) +based on your feedback and [contributions](../CONTRIBUTING.md) - please join our [public Discord server](https://discord.gg/JjWNWXKxwT) if you +would like to help with developments or have questions, suggestions and feature requests. From ced63963739c573517bff97d0aef6af34591024a Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sat, 3 Feb 2024 14:37:12 +0100 Subject: [PATCH 14/15] cleaned TOC --- docs/getting-started.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/getting-started.md b/docs/getting-started.md index 4100248410..72cd11b2fa 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -5,7 +5,7 @@
Click here to see the table of contents. -* [Collective Mind Getting Started Guide](#collective-mind-getting-started-guide) +* [Collective Mind Getting Started Guide and FAQ](#collective-mind-getting-started-guide-and-faq) * [Why CM?](#why-cm?) * [CM automation recipe for image classification](#cm-automation-recipe-for-image-classification) * [How CM scripts works?](#how-cm-scripts-works?) @@ -15,13 +15,15 @@ * [How to add new CM scripts?](#how-to-add-new-cm-scripts?) * [How to customize CM scripts using variations?](#how-to-customize-cm-scripts-using-variations?) * [How to cache and reuse CM scripts' output?](#how-to-cache-and-reuse-cm-scripts'-output?) + * [How to use CM with Python virtual environments?](#how-to-use-cm-with-python-virtual-environments?) * [How to debug CM scripts?](#how-to-debug-cm-scripts?) * [How to extend/improve CM scripts?](#how-to-extend/improve-cm-scripts?) * [How to use CM with containers?](#how-to-use-cm-with-containers?) - * [How to use CM GUI?](#how-to-use-cm-gui?) + * [How to use CM GUI to run automation recipes?](#how-to-use-cm-gui-to-run-automation-recipes?) * [How to run MLPerf benchmarks via CM?](#how-to-run-mlperf-benchmarks-via-cm?) * [How to use CM to reproduce research papers?](#how-to-use-cm-to-reproduce-research-papers?) - * [How to use CM to modularize any software project?](#how-to-use-cm-to-modularize-any-software-project?) + * [How to use CM as a common interface to other projects?](#how-to-use-cm-as-a-common-interface-to-other-projects?) + * [Where to read about the CM vision and history?](#where-to-read-about-the-cm-vision-and-history?) * [How to get in touch with the CM community?](#how-to-get-in-touch-with-the-cm-community?)
From 2a241cf1fcf0c1c13ddce3fa0f25cc6ab9781a60 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sat, 3 Feb 2024 16:06:33 +0100 Subject: [PATCH 15/15] clean up --- README.md | 2 +- docs/getting-started.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d276667be8..f1201a017a 100755 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ ### About Collective Mind (CM) is a [community project](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) to develop -a [collection of portable, extensible, technology-agnostic and ready-to-use automation recipes +a [collection of portable, extensible, technology-agnostic and ready-to-use automation recipes for MLOps and DevOps with a human-friendly interface (aka CM scripts)](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) that help to automate all the manual steps required to prepare, build, run, benchmark and optimize complex ML/AI applications on any platform with any software and hardware. diff --git a/docs/getting-started.md b/docs/getting-started.md index 72cd11b2fa..474817329a 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -33,7 +33,7 @@ Collective Mind (CM) is a [community project](../CONTRIBUTING.md) to develop a [collection of portable, extensible, technology-agnostic and ready-to-use automation recipes -with a human-friendly interface (aka CM scripts)](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) +for MLOps and DevOps with a human-friendly interface (aka CM scripts)](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) that can help to automate all the manual steps required to prepare, build, run, benchmark and optimize complex ML/AI applications on any platform with any software and hardware. They require Python 3.7+ with minimal dependencies and can run natively on Ubuntu, MacOS, Windows, RHEL, Debian, Amazon Linux