From 4b390fc5829d62c89bc44a85d7611710ee9493e5 Mon Sep 17 00:00:00 2001 From: Arjun Date: Sat, 10 Feb 2024 21:22:19 +0000 Subject: [PATCH 01/33] Fixes for intel-gptj --- cm-mlops/script/get-ml-model-gptj/_cm.json | 2 +- cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cm-mlops/script/get-ml-model-gptj/_cm.json b/cm-mlops/script/get-ml-model-gptj/_cm.json index ef23620480..57a0b40615 100644 --- a/cm-mlops/script/get-ml-model-gptj/_cm.json +++ b/cm-mlops/script/get-ml-model-gptj/_cm.json @@ -181,7 +181,6 @@ }, "wget": { "group": "download-tool", - "default": true, "add_deps_recursive": { "dae": { "tags": "_wget" @@ -194,6 +193,7 @@ }, "rclone": { "group": "download-tool", + "default": true, "add_deps_recursive": { "dae": { "tags": "_rclone" diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml index 82aa963a7e..4d2f305e79 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml @@ -295,9 +295,11 @@ variations: - tags: get,generic-python-lib,_package.datasets names: - pip-package + - datasets - tags: get,generic-python-lib,_package.accelerate names: - pip-package + - accelerate gptj-99: group: model From 0d025ca1cadc63c1153990233d5a07af9acc4975 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 10 Feb 2024 22:01:03 +0000 Subject: [PATCH 02/33] Not use cuda for pytorch cpu build --- cm-mlops/script/install-llvm-src/_cm.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cm-mlops/script/install-llvm-src/_cm.json b/cm-mlops/script/install-llvm-src/_cm.json index 7e02fa00ab..d55eccff47 100644 --- a/cm-mlops/script/install-llvm-src/_cm.json +++ b/cm-mlops/script/install-llvm-src/_cm.json @@ -296,7 +296,9 @@ ], "env": { "CM_LLVM_CONDA_ENV": "yes", - "CM_LLVM_16_INTEL_MLPERF_INFERENCE": "yes" + "CM_LLVM_16_INTEL_MLPERF_INFERENCE": "yes", + "USE_CUDA": "0", + "CUDA_VISIBLE_DEVICES": "" } } }, From 18bc3d0ead6b8c5224885d95b3295a44c1a7c7fa Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 11 Feb 2024 03:54:49 +0530 Subject: [PATCH 03/33] Improvements for intel mlperf docker run --- cm-mlops/script/app-mlperf-inference/_cm.yaml | 20 +++++++++++++++++++ .../script/install-pytorch-from-src/_cm.json | 3 ++- .../reproduce-mlperf-inference-intel/_cm.yaml | 2 ++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/cm-mlops/script/app-mlperf-inference/_cm.yaml b/cm-mlops/script/app-mlperf-inference/_cm.yaml index cb31003de3..65ac2aace3 100644 --- a/cm-mlops/script/app-mlperf-inference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference/_cm.yaml @@ -245,6 +245,10 @@ variations: backend: onnxruntime nvidia-original: + docker: + extra_run_args: ' --runtime=nvidia --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public + docker:os_version: "20.04" default_variations: backend: tensorrt device: cuda @@ -1128,3 +1132,19 @@ input_description: gui: title: "CM GUI for the MLPerf inference benchmark" + +docker: + skip_run_cmd: 'no' + shm_size: '32gb' + extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + docker_os: ubuntu + docker_real_run: False + interactive: True + docker_os_version: '22.04' + docker_input_mapping: + imagenet_path: IMAGENET_PATH + gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH + criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH + results_dir: RESULTS_DIR + submission_dir: SUBMISSION_DIR + dlrm_data_path: DLRM_DATA_PATH diff --git a/cm-mlops/script/install-pytorch-from-src/_cm.json b/cm-mlops/script/install-pytorch-from-src/_cm.json index bf9288b7d9..f22b09fbfa 100644 --- a/cm-mlops/script/install-pytorch-from-src/_cm.json +++ b/cm-mlops/script/install-pytorch-from-src/_cm.json @@ -109,7 +109,8 @@ }, "env": { "CM_CONDA_ENV": "yes", - "CM_MLPERF_INFERENCE_INTEL": "yes" + "CM_MLPERF_INFERENCE_INTEL": "yes", + "USE_CUDA": "0" }, "deps": [ { diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml index 4d2f305e79..7658e20e6b 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml @@ -38,6 +38,8 @@ default_env: env: CM_CALL_MLPERF_RUNNER: 'no' + CUDA_VISIBLE_DEVICES: '' + USE_CUDA: '0' # Map script inputs to environment variables input_mapping: From aa65e087e2a54f50300d5786bf34b86f76ccf846 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sun, 11 Feb 2024 15:53:20 +0100 Subject: [PATCH 04/33] improving gui to reproduce benchmarks --- .../benchmark-hardware-compute/amd-gpu.json | 3 +- .../generic-cpu-arm64.json | 3 +- .../generic-cpu-x64.json | 3 +- .../google-tpu.json | 3 +- .../nvidia-gpu-jetson-orin.json | 3 +- .../nvidia-gpu.json | 3 +- .../qualcomm-ai100.json | 5 +- .../any-model-offline.yaml | 7 - ...el-offline.md => any-model-x64-offline.md} | 0 .../any-model-x64-offline.yaml | 5 + .../any-model-offline.yaml | 4 +- ...inanet-reference-python-torch-offline.yaml | 3 +- .../_cm.yaml | 1 + .../_cm.yaml | 26 ++ .../base/_test.yaml | 3 + .../bert-reference-python-onnx-offline.md | 1 + .../bert-reference-python-onnx-offline.yaml | 11 + .../_cm.yaml | 1 + .../base/_demo.yaml | 2 + .../bert-qaic-offline.md | 1 + .../bert-qaic-offline.yaml | 10 + .../bert-reference-python-onnx-offline.md | 1 + .../bert-reference-python-onnx-offline.yaml | 11 +- .../gptj-reference-python-torch-offline.md | 1 + .../gptj-reference-python-torch-offline.yaml | 16 +- .../_cm.yaml | 1 + .../llama2-reference-python-torch-offline.md | 1 + ...llama2-reference-python-torch-offline.yaml | 9 +- cm-mlops/script/gui/playground_howtorun.py | 2 +- cm-mlops/script/launch-benchmark/customize.py | 365 ++++++++++++++---- 30 files changed, 407 insertions(+), 98 deletions(-) delete mode 100644 cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.yaml rename cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/{any-model-offline.md => any-model-x64-offline.md} (100%) create mode 100644 cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.yaml create mode 100644 cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/_cm.yaml create mode 100644 cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/base/_test.yaml create mode 100644 cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.md create mode 100644 cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml create mode 100644 cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/base/_demo.yaml create mode 100644 cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.md create mode 100644 cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml create mode 100644 cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.md create mode 100644 cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.md create mode 100644 cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.md diff --git a/cm-mlops/cfg/benchmark-hardware-compute/amd-gpu.json b/cm-mlops/cfg/benchmark-hardware-compute/amd-gpu.json index 546794e597..d9c3ae64ed 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/amd-gpu.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/amd-gpu.json @@ -1,4 +1,5 @@ { "uid": "d8f06040f7294319", - "name": "AMD GPU" + "name": "AMD GPU", + "tags": "gpu,amd" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-arm64.json b/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-arm64.json index 869c62397a..2f72d88103 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-arm64.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-arm64.json @@ -1,4 +1,5 @@ { "uid":"357a972e79614903", - "name": "Generic CPU - Arm64" + "name": "Generic CPU - Arm64", + "tags": "cpu,arm64,generic" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-x64.json b/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-x64.json index 05531cb49e..6573ca1a42 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-x64.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-x64.json @@ -1,4 +1,5 @@ { "uid": "cdfd424c32734e38", - "name": "Generic CPU - x64" + "name": "Generic CPU - x64", + "tags": "cpu,x64,generic" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/google-tpu.json b/cm-mlops/cfg/benchmark-hardware-compute/google-tpu.json index 59296cc2de..dbcf9c70e9 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/google-tpu.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/google-tpu.json @@ -1,4 +1,5 @@ { "uid": "b3be7ac9ef954f5a", - "name": "Google TPU" + "name": "Google TPU", + "tags": "tpu,google" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu-jetson-orin.json b/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu-jetson-orin.json index cd15f38c9a..8c0f86f2d3 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu-jetson-orin.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu-jetson-orin.json @@ -1,4 +1,5 @@ { "uid": "fe379ecd1e054a00", - "name": "Nvidia GPU - Jetson Orin" + "name": "Nvidia GPU - Jetson Orin", + "tags": "gpu,nvidia,jetson,orin" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu.json b/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu.json index f8fecaf95b..2cdeb0eefa 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu.json @@ -1,4 +1,5 @@ { "uid": "fe379ecd1e054a00", - "name": "Nvidia GPU" + "name": "Nvidia GPU", + "tags": "gpu,nvidia" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/qualcomm-ai100.json b/cm-mlops/cfg/benchmark-hardware-compute/qualcomm-ai100.json index 83e18562f4..33b3ac8abd 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/qualcomm-ai100.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/qualcomm-ai100.json @@ -1,4 +1,5 @@ { - "uid": "fe379ecd1e054a00", - "name": "Qualcomm - AI 100" + "uid": "d2ae645066664463", + "name": "Qualcomm - AI 100", + "tags": "accelerator,acc,qualcomm,ai,100,ai-100" } diff --git a/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.yaml b/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.yaml deleted file mode 100644 index 6f2d290046..0000000000 --- a/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.yaml +++ /dev/null @@ -1,7 +0,0 @@ -uid: 125abafe58dc4473 - -name: "Any model - offline" - -supported_compute: -- cdfd424c32734e38 -- 357a972e79614903 diff --git a/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.md b/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.md similarity index 100% rename from cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.md rename to cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.md diff --git a/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.yaml b/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.yaml new file mode 100644 index 0000000000..1d848ec00a --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.yaml @@ -0,0 +1,5 @@ +uid: 125abafe58dc4473 + +name: "Any model - x64 - offline" + +compute_uid: cdfd424c32734e38 diff --git a/cm-mlops/cfg/benchmark-run-loadgen-python-dev/any-model-offline.yaml b/cm-mlops/cfg/benchmark-run-loadgen-python-dev/any-model-offline.yaml index aefd1822f4..677ce45f05 100644 --- a/cm-mlops/cfg/benchmark-run-loadgen-python-dev/any-model-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-loadgen-python-dev/any-model-offline.yaml @@ -2,6 +2,4 @@ uid: db45dcd686854602 name: "Any model - offline" -supported_compute: -- cdfd424c32734e38 -- 357a972e79614903 +compute_uid: cdfd424c32734e38 diff --git a/cm-mlops/cfg/benchmark-run-mlperf-abtf-dev/retinanet-reference-python-torch-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-abtf-dev/retinanet-reference-python-torch-offline.yaml index 55bbec3719..f1fe61593f 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-abtf-dev/retinanet-reference-python-torch-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-abtf-dev/retinanet-reference-python-torch-offline.yaml @@ -2,5 +2,4 @@ uid: "fe379ecd1e054a00" name: "RetinaNet Reference Python Torch Offline" -supported_compute: -- cdfd424c32734e38 +compute_uid: cdfd424c32734e38 diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml index 11c0c31277..ea33ddc6ba 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml @@ -16,6 +16,7 @@ name: "MLPerf inference - latest" supported_compute: - 357a972e79614903 - cdfd424c32734e38 +- d2ae645066664463 urls: - name: "Official page" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/_cm.yaml new file mode 100644 index 0000000000..1f0c1a9ec0 --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/_cm.yaml @@ -0,0 +1,26 @@ +alias: benchmark-run-mlperf-inference-scc23 +uid: 9133e5b1dddc4e4a + +automation_alias: cfg +automation_uid: 88dce9c160324c5d + +tags: +- benchmark +- run +- mlperf +- inference +- v3.1 + +name: "MLPerf inference - Student Cluster Competition 2023" + +supported_compute: +- fe379ecd1e054a00 +- cdfd424c32734e38 +- fe379ecd1e054a00 +- d2ae645066664463 + +urls: +- name: "Official page" + url: "https://sc23.supercomputing.org/students/student-cluster-competition/" +- name: "Tutorial to run MLPerf inference benchmark " + url: "https://github.com/mlcommons/ck/blob/master/docs/tutorials/scc23-mlperf-inference-bert.md" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/base/_test.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/base/_test.yaml new file mode 100644 index 0000000000..2869ed71b6 --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/base/_test.yaml @@ -0,0 +1,3 @@ +name: "BASE" + +tags: "base" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.md b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.md @@ -0,0 +1 @@ +TBD diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml new file mode 100644 index 0000000000..7b612a2c39 --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml @@ -0,0 +1,11 @@ +uid: 9eee8cb06621413a + +name: "BERT Reference Python ONNX Offline" + +compute_uid: cdfd424c32734e38 + +input: + model: bert + implementation: reference + framework: onnx + diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml index f59559bcd8..8e890e6c7c 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml @@ -17,6 +17,7 @@ supported_compute: - fe379ecd1e054a00 - cdfd424c32734e38 - fe379ecd1e054a00 +- d2ae645066664463 urls: - name: "Official page" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/base/_demo.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/base/_demo.yaml new file mode 100644 index 0000000000..6c2c3145cd --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/base/_demo.yaml @@ -0,0 +1,2 @@ +# DEMO +base_demo: true diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.md b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.md @@ -0,0 +1 @@ +TBD diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml new file mode 100644 index 0000000000..96d66d8bcd --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml @@ -0,0 +1,10 @@ +uid: c6ae695138e74a29 + +name: "BERT QAIC Offline" + +compute_uid: cdfd424c32734e38 + +input: + model: bert + implementation: qaic + framework: qaic diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.md b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.md @@ -0,0 +1 @@ +TBD diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml index b3a9f0dd1c..7b612a2c39 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml @@ -2,7 +2,10 @@ uid: 9eee8cb06621413a name: "BERT Reference Python ONNX Offline" -supported_compute: -- cdfd424c32734e38 -- 357a972e79614903 -- fe379ecd1e054a00 +compute_uid: cdfd424c32734e38 + +input: + model: bert + implementation: reference + framework: onnx + diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.md b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.md @@ -0,0 +1 @@ +TBD diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml index e3cc190aaf..5a56d16d3d 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml @@ -1,6 +1,16 @@ +_base: "base/_demo.yaml" + uid: 53e4028a3b31400d -name: "GPT-J Reference Python Torch Offline" +name: " BERT Reference Python ONNX Offline" + +compute_uid: cdfd424c32734e38 + +tags: ",offline" + +input: + model: gptj + implementation: reference + framework: torch -supported_compute: -- fe379ecd1e054a00 + \ No newline at end of file diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml index 2be6c36da5..9fe5d48726 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml @@ -17,6 +17,7 @@ supported_compute: - fe379ecd1e054a00 - cdfd424c32734e38 - fe379ecd1e054a00 +- d2ae645066664463 urls: - name: "Official page" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.md b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.md new file mode 100644 index 0000000000..6b81d9cd00 --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.md @@ -0,0 +1 @@ +# TBD \ No newline at end of file diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.yaml index 831aa0347b..63653a9633 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.yaml @@ -1,6 +1,9 @@ +_base: "benchmark-run-mlperf-inference-v3.1,8eb42e27ec984185:base/_demo.yaml" + uid: 4df38ed8dd804678 -name: "LLAMA2 Reference Python Torch Offline" +name: " Offline" + +compute_uid: fe379ecd1e054a00 -supported_compute: -- fe379ecd1e054a00 +tags: ",llama2,offline" diff --git a/cm-mlops/script/gui/playground_howtorun.py b/cm-mlops/script/gui/playground_howtorun.py index db1d927163..083dc10918 100644 --- a/cm-mlops/script/gui/playground_howtorun.py +++ b/cm-mlops/script/gui/playground_howtorun.py @@ -8,7 +8,7 @@ import streamlit as st -announcement = 'Under development - please follow the [MLCommons Collective Mind automation project](https://github.com/mlcommons/ck) and stay tuned for more details ...' +announcement = 'Under development - please get in touch via [Discord](https://discord.gg/JjWNWXKxwT) for more details ...' initialized = False external_module_path = '' diff --git a/cm-mlops/script/launch-benchmark/customize.py b/cm-mlops/script/launch-benchmark/customize.py index 8baf9f2cf8..7c326909e2 100644 --- a/cm-mlops/script/launch-benchmark/customize.py +++ b/cm-mlops/script/launch-benchmark/customize.py @@ -1,5 +1,9 @@ import cmind import os +import copy + +base_path={} +base_path_meta={} ################################################################################## def preprocess(i): @@ -46,6 +50,7 @@ def load_cfg(i): prune = i.get('prune',{}) prune_key = prune.get('key', '') + prune_key_uid = prune.get('key_uid', '') prune_uid = prune.get('uid', '') prune_list = prune.get('list',[]) @@ -59,11 +64,21 @@ def load_cfg(i): meta['full_path']=full_path - selection.append(meta) + add = True + + if prune_key!='' and prune_key_uid!='': + if prune_key_uid not in meta.get(prune_key, []): + add = False + + if add: + selection.append(meta) else: for l in lst: path = l.path + main_meta = l.meta + all_tags = main_meta.get('tags',[]) + files = os.listdir(path) for f in files: @@ -83,30 +98,132 @@ def load_cfg(i): else: meta = r['meta'] + # Check base + r = process_base(meta, full_path) + if r['return']>0: return r + meta = r['meta'] + uid = meta['uid'] # Check pruning add = True - if prune_uid!='' and uid != prune_uid: - add = False - - if add and prune_key!='' and len(prune_list)>0 and uid not in prune_list: - add = False + if len(prune)>0: + if prune_uid!='' and uid != prune_uid: + add = False + + if add and len(prune_list)>0 and uid not in prune_list: + add = False + + if add and prune_key!='' and prune_key_uid!='' and prune_key_uid != meta.get(prune_key, None): + add = False if add: meta['full_path']=full_path + add_all_tags = copy.deepcopy(all_tags) + name = meta.get('name','') if name=='': name = ' '.join(meta.get('tags',[])) name = name.strip() meta['name'] = name - + + file_tags = meta.get('tags', '').strip() + if file_tags=='': + if name!='': + add_all_tags += [v.lower() for v in name.split(' ')] + else: + add_all_tags += file_tags.split(',') + + meta['all_tags']=add_all_tags + + meta['main_meta']=main_meta + selection.append(meta) return {'return':0, 'lst':lst, 'selection':selection} +################################################################################## +def process_base(meta, full_path): + + global base_path, base_path_meta + + _base = meta.get('_base', '') + if _base != '': + name = '' + + filename = _base + full_path_base = os.path.dirname(full_path) + + if not filename.endswith('.yaml') and not filename.endswith('.json'): + return {'return':1, 'error':'_base file {} in {} must be .yaml or .json'.format(filename, full_path)} + + if ':' in _base: + x = _base.split(':') + name = x[0] + + full_path_base = base_path.get(name, '') + if full_path_base == '': + + # Find artifact + r = cmind.access({'action':'find', + 'automation':'cfg', + 'artifact':name}) + if r['return']>0: return r + + lst = r['list'] + + if len(lst)==0: + if not os.path.isfile(path): + return {'return':1, 'error':'_base artifact {} not found in {}'.format(name, full_path)} + + full_path_base = lst[0].path + + base_path[name] = full_path_base + + filename = x[1] + + # Load base + path = os.path.join(full_path_base, filename) + + if not os.path.isfile(path): + return {'return':1, 'error':'_base file {} not found in {}'.format(filename, full_path)} + + if path in base_path_meta: + base = copy.deepcopy(base_path_meta[path]) + else: + path_without_ext = path[:-5] + + r = cmind.utils.load_yaml_and_json(path_without_ext) + if r['return']>0: return r + + base = r['meta'] + + base_path_meta[path]=copy.deepcopy(base) + + for k in meta: + v = meta[k] + + if k not in base: + base[k]=v + else: + if isinstance(v, str): + # Only merge a few special keys and overwrite the rest + if k in ['tags','name']: + base[k] += meta[k] + else: + base[k] = meta[k] + + elif type(v) == list: + for vv in v: + base[k].append(vv) + elif type(v) == dict: + base[k].merge(v) + + meta = base + + return {'return':0, 'meta':meta} ################################################################################## def gui(i): @@ -130,82 +247,137 @@ def gui(i): x = params.get('uid',['']) if len(x)>0 and x[0]!='': uid = x[0].strip() + bench_uid = '' + x = params.get('bench_uid',['']) + if len(x)>0 and x[0]!='': bench_uid = x[0].strip() + + compute_uid = '' + x = params.get('compute_uid',['']) + if len(x)>0 and x[0]!='': compute_uid = x[0].strip() - # Preparing state - if 'bench_id' not in st.session_state: st.session_state['bench_id']=0 - if 'compute_id' not in st.session_state: st.session_state['compute_id']=0 - ############################################################## # Check the first level of benchmarks - bench_id = 0 - - ii = {'tags':'benchmark,run', 'skip_files':True} + ii = {'tags':'benchmark,run', 'skip_files':True, 'prune':{}} if uid != '': ii['skip_files'] = False - ii['prune']={'uid':uid} + ii['prune']['uid']=uid + if bench_uid !='': + ii['artifact']=bench_uid + if compute_uid !='': + ii['prune']['key']='supported_compute' + ii['prune']['key_uid']=compute_uid r=load_cfg(ii) if r['return']>0: return r lst = r['selection'] + + if len(lst)==0: + st.markdown('Warning: no benchmarks found!') + return {'return':0} test_meta = {} + bench_id = 0 + + + + if uid != '': + if len(lst)==0: + st.markdown('CM test with UID "{}" not found!'.format(uid)) + return {'return':0} + elif len(lst)>1: + st.markdown('Warning: More than 1 CM test found with UID "{}" - ambiguity!'.format(uid)) + return {'return':0} + + test_meta = lst[0] + + bench_id = 1 + compute_uid = test_meta['compute_uid'] + bench_supported_compute = [compute_uid] + + if uid == '': selection = sorted(lst, key = lambda v: v['name']) bench_selection = [{'name':''}] + selection + bench_id_index = 0 if bench_uid == '' else 1 + bench_id = st.selectbox('Select benchmark:', range(len(bench_selection)), format_func=lambda x: bench_selection[x]['name'], - index = 0, + index = bench_id_index, key = 'bench') bench_supported_compute = [] bench_meta = {} - if bench_id != st.session_state['bench_id']: + if bench_id>0: bench_meta = bench_selection[bench_id] bench_supported_compute = bench_meta.get('supported_compute',[]) - urls = bench_meta.get('urls',[]) - if len(urls)>0: - x = '\n' - for u in urls: - name = u['name'] - url = u['url'] - - x+=' [ [{}]({}) ] '.format(name, url) - x+='\n' - st.markdown(x) + urls = bench_meta.get('urls',[]) + if len(urls)>0: + x = '\n' + for u in urls: + name = u['name'] + url = u['url'] + x+=' [ [{}]({}) ] '.format(name, url) + x+='\n' + st.markdown(x) + + if True==True: ############################################################## # Check compute - r=load_cfg({'tags':'benchmark,compute', - 'prune':{'key':'supported_compute', 'list':bench_supported_compute}}) - if r['return']>0: return r - selection = sorted(r['selection'], key = lambda v: v['name']) - compute_selection = [{'name':''}] + selection + ii = {'tags':'benchmark,compute'} + if bench_id>0: + if compute_uid !='': + x = [compute_uid] + else: + x = bench_supported_compute + if len(x) == 0: + st.markdown('Warning: no supported compute selected!') + return {'return':0} + + ii['prune']={'list':x} + r=load_cfg(ii) + if r['return']>0: return r - # Creating compute selector - compute_id = st.selectbox('Select target hardware:', - range(len(compute_selection)), - format_func=lambda x: compute_selection[x]['name'], - index = 0, - key = 'compute') + selection = sorted(r['selection'], key = lambda v: v['name']) - if compute_id!=st.session_state['compute_id']: - st.session_state['compute_id']=compute_id + if len(selection) == 0 : + st.markdown('Warning: no supported compute found!') + return {'return':0} + + compute_selection = [{'name':''}] + if len(selection)>0: + compute_selection += selection - try: - st.rerun() - except: - st.experimental_rerun() + compute_id_index = 0 if compute_uid == '' else 1 + + if uid == '': + compute_id = st.selectbox('Select target hardware:', + range(len(compute_selection)), + format_func=lambda x: compute_selection[x]['name'], + index = compute_id_index, + key = 'compute') + + compute = {} + if compute_id>0: + compute = compute_selection[compute_id] + compute_uid = compute['uid'] + + compute_meta = {} + for c in compute_selection: + if c.get('uid','')!='': + compute_meta[c['uid']]=c + if uid == '': ############################################################## # Check tests @@ -214,7 +386,8 @@ def gui(i): if bench_id>0: bench_uid = bench_selection[bench_id]['uid'] ii['artifact']=bench_uid - + if compute_uid!='': + ii['prune']={'key':'compute_uid', 'key_uid':compute_uid} r=load_cfg(ii) if r['return']>0: return r @@ -222,43 +395,97 @@ def gui(i): selection = sorted(r['selection'], key = lambda v: v['name']) # Check how many and prune - if len(selection)>1: + if len(selection) == 0: + st.markdown('No CM tests found') + return {'return':0} + + for s in selection: + c_uid = s.get('compute_uid','') + if c_uid!='': + c_tags = compute_meta[c_uid].get('tags','') + if c_tags!='': + s['all_tags']+=c_tags.split(',') - test_tags = st.text_input('Found {} CM tests. Prune them by tags:'.format(str(len(selection))), value='', key='test_tags').strip() + s['compute_meta']=compute_meta[c_uid] + + if len(selection)>1: + # Update selection with compute tags + test_tags = '' + x = params.get('tags',['']) + if len(x)>0 and x[0]!='': test_tags = x[0].strip() + test_tags = st.text_input('Found {} CM tests. Prune them by tags:'.format(str(len(selection))), value=test_tags, key='test_tags').strip() + if test_tags!='': + test_tags_list = test_tags.replace(' ',',').split(',') + pruned_selection = [] + + for s in selection: + all_tags = s['all_tags'] + + add = True + + for t in test_tags_list: + if t not in all_tags: + add = False + break + + if add: + pruned_selection.append(s) - + selection = pruned_selection + test_selection = [{'name':''}] + selection - # Creating compute selector - test_id = st.selectbox('Select test:', - range(len(test_selection)), - format_func=lambda x: test_selection[x]['name'], - index = 0, - key = 'test') - + if len(selection)<200: + # Creating compute selector + test_id_index = 1 if len(selection)==1 else 0 + + test_id = st.selectbox('Select a test from {}:'.format(str(len(selection))), + range(len(test_selection)), + format_func=lambda x: test_selection[x]['name'], + index = test_id_index, + key = 'test') + + + if test_id >0: + test_meta = test_selection[test_id] + else: + ######################################################################### + # View many (table) + st.markdown('---') + + for s in selection: + st.markdown('* '+str(s)) + + + + + + - if test_id >0: - test_meta = test_selection[test_id] - - else: - if len(lst)==0: - st.markdown('CM test with UID "{}" not found!'.format(uid)) - return {'return':0} - elif len(lst)>1: - st.markdown('Warning: More than 1 CM test found with UID "{}" - ambiguity!'.format(uid)) - return {'return':0} - test_meta = lst[0] ############################################################## + # Show individual test if len(test_meta)>0: - st.markdown('---') + if uid != '': + c_uid = test_meta.get('compute_uid','') + if c_uid!='': + c_tags = compute_meta[c_uid].get('tags','') + if c_tags!='': + test_meta['all_tags']+=c_tags.split(',') + + test_meta['compute_meta']=compute_meta[c_uid] + + + if uid == '': + st.markdown('---') + st.markdown(str(test_meta)) test_path = test_meta['full_path'] @@ -277,5 +504,9 @@ def gui(i): + + + + return {'return':0} From fc2cc8824b83afa12bc42825c814cbfb04738e3e Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sun, 11 Feb 2024 16:37:15 +0100 Subject: [PATCH 05/33] started working on test table / matrix --- .../bert-reference-python-onnx-offline.yaml | 2 +- cm-mlops/script/gui/playground_howtorun.py | 3 +- cm-mlops/script/launch-benchmark/customize.py | 114 ++++++++++++++++-- 3 files changed, 107 insertions(+), 12 deletions(-) diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml index 7b612a2c39..fdc291b8be 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml @@ -1,4 +1,4 @@ -uid: 9eee8cb06621413a +uid: 35e8895a1b714ed3 name: "BERT Reference Python ONNX Offline" diff --git a/cm-mlops/script/gui/playground_howtorun.py b/cm-mlops/script/gui/playground_howtorun.py index 083dc10918..0c57b02549 100644 --- a/cm-mlops/script/gui/playground_howtorun.py +++ b/cm-mlops/script/gui/playground_howtorun.py @@ -56,6 +56,7 @@ def page(st, params, action = ''): ii = {'streamlit_module': st, 'params': params, 'meta': external_module_meta, - 'skip_title': True} + 'skip_title': True, + 'misc_module': misc} return cmind.utils.call_internal_module(None, external_module_path , 'customize', 'gui', ii) diff --git a/cm-mlops/script/launch-benchmark/customize.py b/cm-mlops/script/launch-benchmark/customize.py index 7c326909e2..b7f8694723 100644 --- a/cm-mlops/script/launch-benchmark/customize.py +++ b/cm-mlops/script/launch-benchmark/customize.py @@ -225,15 +225,74 @@ def process_base(meta, full_path): return {'return':0, 'meta':meta} +################################################################################## +def prepare_table(i): + + import pandas as pd + import numpy as np + + selection = i['selection'] + misc = i['misc_module'] + + html = '' + + all_data = [] + keys = [ + ('x1', 'Var1', 400, 'leftAligned'), + ('x2', 'Var2', 80,'rightAligned'), + ('x3', 'Var3', 80, ''), + ] + + for s in selection: + row = {} + + uid = s['uid'] + + row['x1']=uid + row['x2']=s['name'] + + url = misc.make_url(uid, key='uid', action='howtorun', md=False) + + row['x3']='See test'.format(url) + + all_data.append(row) + + + # Visualize table + pd_keys = [v[0] for v in keys] + pd_key_names = [v[1] for v in keys] + pd_all_data = [] + for row in sorted(all_data, key=lambda row: (row.get('x1',0))): + pd_row=[] + for k in pd_keys: + pd_row.append(row.get(k)) + pd_all_data.append(pd_row) + + df = pd.DataFrame(pd_all_data, columns = pd_key_names) + + df.index+=1 + + html=df.to_html(escape=False, justify='left') + + return {'return':0, 'html':html} + + + + + + ################################################################################## def gui(i): params = i['params'] st = i['streamlit_module'] + misc = i['misc_module'] meta = i['meta'] gui_meta = meta['gui'] skip_header = i.get('skip_title', False) - + + end_html = '' + if not skip_header: # Title title = gui_meta['title'] @@ -282,7 +341,7 @@ def gui(i): bench_id = 0 - + ########################################################################################################### if uid != '': if len(lst)==0: st.markdown('CM test with UID "{}" not found!'.format(uid)) @@ -302,8 +361,22 @@ def gui(i): selection = sorted(lst, key = lambda v: v['name']) bench_selection = [{'name':''}] + selection - bench_id_index = 0 if bench_uid == '' else 1 + if bench_uid !='': + bench_id_index = 1 + else: + # Check if want to force some benchmark by default + # 27c06c35bceb4059 == MLPerf inference v4.0 + + bench_id_index = 0 + j=0 + for b in bench_selection: + if b.get('uid','')=='27c06c35bceb4059': + bench_id_index=j + break + j+=1 + + bench_id = st.selectbox('Select benchmark:', range(len(bench_selection)), format_func=lambda x: bench_selection[x]['name'], @@ -329,6 +402,7 @@ def gui(i): st.markdown(x) + ########################################################################################################### if True==True: ############################################################## # Check compute @@ -377,6 +451,7 @@ def gui(i): if c.get('uid','')!='': compute_meta[c['uid']]=c + ########################################################################################################### if uid == '': ############################################################## @@ -459,8 +534,13 @@ def gui(i): # View many (table) st.markdown('---') - for s in selection: - st.markdown('* '+str(s)) + r = prepare_table({'selection':selection, + 'misc_module':misc}) + if r['return']>0: return r + + html=r['html'] + + st.write(html, unsafe_allow_html = True) @@ -486,8 +566,9 @@ def gui(i): if uid == '': st.markdown('---') - st.markdown(str(test_meta)) + uid = test_meta['uid'] + # First, check if there is a README test_path = test_meta['full_path'] test_md = test_meta['full_path'][:-5]+'.md' @@ -498,15 +579,28 @@ def gui(i): s = r['string'] - st.markdown('---') - st.markdown(s) + # Next print some info (for now JSON) + import json + x = """ +--- +**CM test dictionary:** +```json +{} +``` + """.format(json.dumps(test_meta, indent=2)) + st.markdown(x) + + + # Create self link + # This misc module is in CM "gui" script + x1 = misc.make_url(uid, key='uid', action='howtorun', md=False) + end_html='
Self link
'.format(x1) - - return {'return':0} + return {'return':0, 'end_html': end_html} From 9b7abba6739babcb0edfe5b5da5ee8a77d03c18c Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sun, 11 Feb 2024 20:12:23 +0100 Subject: [PATCH 06/33] adding table with badges for CM tests --- .../_cm.yaml | 8 + .../_cm.yaml | 8 + .../bert-qaic-offline.yaml | 4 + .../bert-reference-python-onnx-offline.yaml | 2 + .../gptj-reference-python-torch-offline.yaml | 1 + .../_cm.yaml | 8 + cm-mlops/script/launch-benchmark/customize.py | 153 ++++++++++++++++-- 7 files changed, 167 insertions(+), 17 deletions(-) diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml index ea33ddc6ba..3793099289 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml @@ -25,3 +25,11 @@ urls: url: "https://github.com/mlcommons/inference" - name: "MLCommons CM automation (under development)" url: "https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference" + +dimensions: +- - input.model + - "MLPerf model" +- - input.implementation + - "MLPerf implementation" +- - input.framework + - "MLPerf framework" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml index 8e890e6c7c..5258d892d5 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml @@ -26,3 +26,11 @@ urls: url: "https://github.com/mlcommons/inference" - name: "MLCommons CM automation (under development)" url: "https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference" + +dimensions: +- - input.model + - "MLPerf model" +- - input.implementation + - "MLPerf implementation" +- - input.framework + - "MLPerf framework" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml index 96d66d8bcd..2cd71103c7 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml @@ -8,3 +8,7 @@ input: model: bert implementation: qaic framework: qaic + +functional: true + +notes: "Notes" \ No newline at end of file diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml index 7b612a2c39..a8a93eb7a0 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml @@ -9,3 +9,5 @@ input: implementation: reference framework: onnx +functional: false + diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml index 5a56d16d3d..7cc38c8058 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml @@ -13,4 +13,5 @@ input: implementation: reference framework: torch +reproduced: true \ No newline at end of file diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml index 9fe5d48726..522730bf2a 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml @@ -26,3 +26,11 @@ urls: url: "https://github.com/mlcommons/inference" - name: "MLCommons CM automation (under development)" url: "https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference" + +dimensions: +- - input.model + - "MLPerf model" +- - input.implementation + - "MLPerf implementation" +- - input.framework + - "MLPerf framework" diff --git a/cm-mlops/script/launch-benchmark/customize.py b/cm-mlops/script/launch-benchmark/customize.py index b7f8694723..b6f0e9f745 100644 --- a/cm-mlops/script/launch-benchmark/customize.py +++ b/cm-mlops/script/launch-benchmark/customize.py @@ -225,6 +225,32 @@ def process_base(meta, full_path): return {'return':0, 'meta':meta} + + + +################################################################################## +def get_with_complex_key(meta, key): + + j = key.find('.') + + if j<0: + return meta.get(key) + + key0 = key[:j] + + if key0 not in meta: + return None + + return get_with_complex_key(meta[key0], key[j+1:]) + +################################################################################## +def get_with_complex_key_safe(meta, key): + v = get_with_complex_key(meta, key) + + if v == None: v='' + + return v + ################################################################################## def prepare_table(i): @@ -237,30 +263,111 @@ def prepare_table(i): html = '' all_data = [] - keys = [ - ('x1', 'Var1', 400, 'leftAligned'), - ('x2', 'Var2', 80,'rightAligned'), - ('x3', 'Var3', 80, ''), - ] + +# dimensions = [('input.model', 'MLPerf model'), +# ('input.implementation', 'MLPerf implementation'), +# ('input.framework', 'MLPerf framework')] + + dimensions = i.get('dimensions', []) + + dimension_values = {} + dimension_keys = [] + + if len(dimensions) == 0: + keys = [('test', 'CM test', 400, 'leftAligned')] + else: + keys = [('test', 'CM test', 50, 'leftAligned')] + + for k in dimensions: + key = k[0] + + keys.append((k[0], k[1], 100, 'leftAligned')) + + dimension_values[key] = [] + dimension_keys.append(key) + +# # assemble all values +# for s in selection: +# for k in dimensions: +# key = k[0] +# +# value = get_with_complex_key(selection, key) +# +# if value!=None and value!='' and value not in dimension_values[key]: +# dimension_values.append(value) + + # If dimensions, sort by dimensions + for d in list(reversed(dimension_keys)): + selection = sorted(selection, key = lambda x: get_with_complex_key_safe(selection, d)) + + + + keys += [ + ('functional', 'Functional', 80, ''), + ('reproduced', 'Reproduced', 80, ''), + ('notes', 'Notes', 200, 'lefAligned'), + ] + + j = 0 + + badges_url={'functional':'https://cTuning.org/images/artifacts_evaluated_functional_v1_1_small.png', + 'reproduced':'https://cTuning.org/images/results_reproduced_v1_1_small.png'} + + + + + for s in selection: row = {} + j += 1 + uid = s['uid'] - - row['x1']=uid - row['x2']=s['name'] url = misc.make_url(uid, key='uid', action='howtorun', md=False) + + name = s.get('name','') + if name == '': name = uid + + + if len(dimensions) == 0: + row['test'] = '{}'.format(url, name) + else: + row['test'] = 'View'.format(url) + for k in dimensions: + kk = k[0] + + v = get_with_complex_key_safe(s, kk) + + row[kk] = str(v) + + + + + # Check ACM/IEEE functional badge + x = '' + if s.get('functional', False): + x = '
'.format(url, badges_url['functional']) + row['functional'] = x + + # Check ACM/IEEE reproduced badge + x = '' + if s.get('reproduced', False): + x = '
'.format(url, badges_url['reproduced']) + row['reproduced'] = x - row['x3']='See test'.format(url) + # Check misc notes + row['notes']=s.get('notes','') + # Finish row all_data.append(row) # Visualize table pd_keys = [v[0] for v in keys] pd_key_names = [v[1] for v in keys] + pd_all_data = [] for row in sorted(all_data, key=lambda row: (row.get('x1',0))): pd_row=[] @@ -272,9 +379,7 @@ def prepare_table(i): df.index+=1 - html=df.to_html(escape=False, justify='left') - - return {'return':0, 'html':html} + return {'return':0, 'df':df} @@ -532,17 +637,31 @@ def gui(i): else: ######################################################################### # View many (table) - st.markdown('---') + ii = {'selection':selection, + 'misc_module':misc} + + # Check if dimensions in the bench + dimensions = bench_meta.get('dimensions', []) + if len(dimensions)>0: + viewer_selection = ['benchmark specific', 'universal'] + + viewer = st.selectbox('Viewer:', viewer_selection, key = 'viewer') + + if viewer == 'benchmark specific': + ii['dimensions'] = dimensions + + else: + st.markdown('---') - r = prepare_table({'selection':selection, - 'misc_module':misc}) + r = prepare_table(ii) if r['return']>0: return r - html=r['html'] + df = r['df'] + html=df.to_html(escape=False, justify='left') st.write(html, unsafe_allow_html = True) - +# st.dataframe(df, unsafe_allow_html = True) From b4201b3f98d688b4e21e075d5b82662a7c17290e Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 10:23:14 +0100 Subject: [PATCH 07/33] started cleaning up MLPerf docs --- docs/mlperf/README.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/docs/mlperf/README.md b/docs/mlperf/README.md index ae7fe74753..3e6dbfe114 100644 --- a/docs/mlperf/README.md +++ b/docs/mlperf/README.md @@ -1,17 +1,21 @@ [ [Back to CM documentation](../README.md) ] -# Run MLPerf benchmarks out-of-the-box +# How to run and customize MLPerf benchmarks? -This documentation will help you run, reproduce and compare MLPerf benchmarks out-of-the-box -in a unified way across different software, hardware, models and data sets using -the the [MLCommons Collective Mind automation language (CM)](https://doi.org/10.5281/zenodo.8105339). +This documentation explains how to run, customize and extend MLPerf benchmarks +in a unified way across diverse models, data sets, software and hardware from different vendors +using [MLCommons Collective Mind automation recipes](https://access.cknowledge.org/playground/?action=scripts): -Please choose which benchmark you want to run: * [MLPerf inference benchmark](inference/README.md) * [MLPerf training benchmark](../tutorials/reproduce-mlperf-training.md) *(prototyping phase)* * [MLPerf tiny benchmark](../tutorials/reproduce-mlperf-tiny.md) *(prototyping phase)* +* MLPerf automotive *(prototyping phase)* * MLPerf mobile *(preparation phase)* +* MLPerf client *(preparation phase)* -This project is under development by the [MLCommons Task Force on Automation and Reproducibility](../taskforce.md), -[cTuning.org](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org) - don't hesitate to get in touch -via the [public Discord server](https://discord.gg/JjWNWXKxwT). +*Note that the [MLCommons Task Force on Automation and Reproducibility](../taskforce.md) + is preparing a [GUI](https://access.cknowledge.org/playground/?action=howtorun) + to make it easier to run, customize, reproduce and compare + MLPerf benchmarks - please stay tuned for more details!* + +Don't hesitate to get in touch via the [public Discord server](https://discord.gg/JjWNWXKxwT) if you have questions or feedback! From a9e3835ea96cc469500efa64dd9fab6d88fd6dce Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 10:34:43 +0100 Subject: [PATCH 08/33] added compute visualization --- cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml index 5258d892d5..c174ee5aa5 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml @@ -34,3 +34,5 @@ dimensions: - "MLPerf implementation" - - input.framework - "MLPerf framework" +- - compute_meta.name + - "Compute" From c1d4e7eec99d5c84ba49636b7529e6c655f77bed Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 11:00:39 +0100 Subject: [PATCH 09/33] cleaned platform/playground docs --- cm-mlops/automation/list_of_scripts.md | 37 --------------- platform/README.md | 54 ++++++---------------- platform/get-started.md | 38 --------------- platform/register.md | 12 +++-- platform/register2.md | 59 ------------------------ platform/scripts/1-install-deps-cloud.sh | 0 platform/scripts/1-install-deps.sh | 0 platform/scripts/2-run-in-cloud-nohup.sh | 0 platform/scripts/2-run-in-cloud.sh | 0 platform/scripts/restart_apache2.sh | 0 10 files changed, 21 insertions(+), 179 deletions(-) delete mode 100644 cm-mlops/automation/list_of_scripts.md delete mode 100644 platform/get-started.md delete mode 100644 platform/register2.md mode change 100755 => 100644 platform/scripts/1-install-deps-cloud.sh mode change 100755 => 100644 platform/scripts/1-install-deps.sh mode change 100755 => 100644 platform/scripts/2-run-in-cloud-nohup.sh mode change 100755 => 100644 platform/scripts/2-run-in-cloud.sh mode change 100755 => 100644 platform/scripts/restart_apache2.sh diff --git a/cm-mlops/automation/list_of_scripts.md b/cm-mlops/automation/list_of_scripts.md deleted file mode 100644 index ac822e1db9..0000000000 --- a/cm-mlops/automation/list_of_scripts.md +++ /dev/null @@ -1,37 +0,0 @@ -[ [Back to index](README.md) ] - - - -This is an automatically generated list of reusable CM scripts being developed -by the [open taskforce on automation and reproducibility](https://github.com/mlcommons/ck/issues/536) -to make MLOps and DevOps tools more interoperable, portable, deterministic and reproducible. -These scripts suppport the community effort to modularize ML Systems and automate their bechmarking, optimization, -design space exploration and deployment across continuously changing software and hardware. - -# List of CM scripts by categories - -
-Click here to see the table of contents. - -* [Platform information](#platform-information) - - -
- -### Platform information - -* [detect-os](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/detect-os) - - -# List of all sorted CM scripts - -* [detect-os](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/detect-os) - - - - -# Maintainers - -* [Open MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md)' diff --git a/platform/README.md b/platform/README.md index 59cfc9ec3f..0725ac3a5e 100644 --- a/platform/README.md +++ b/platform/README.md @@ -1,49 +1,32 @@ # Collective Knowledge Playground -*Note that this project is under heavy development. - We are preparing a major update based on very useful feedback from our users during MLPerf inference 3.1 community submission!* +*This project is under heavy development led by the [MLCommons Task Force on Automation and Reproducibility](../docs/taskforce.md), + [cTuning.org](https://cTuning.org) and [cKnowledge.org](cKnowledge.org) - please join the [public Discord server]() to discuss this project!* + + ### Introduction -The [Collective Knowledge Playground (CK)](https://x.cknowledge.org) is a free, open-source, and technology-agnostic on-prem platform -being developed by the [MLCommons task force on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce). -It is intended to connect academia and industry to benchmark, optimize and compare AI, ML and other emerging applications +The [Collective Knowledge Playground (CK)](https://access.cknowledge.org) is a free, open-source, and technology-agnostic on-prem platform +being developed by the [MLCommons task force on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +to benchmark, optimize and compare AI, ML and other emerging applications across diverse and rapidly evolving models, software, hardware and data from different vendors in terms of costs, performance, power consumption, accuracy, size and other metrics in a unified, collaborative, automated, and reproducible way. -This platform is powered by the portable and technology-agnostic [Collective Mind scripting language (MLCommons CM)]( https://github.com/mlcommons/ck/tree/master/cmind ) -with [portable and reusable CM scripts](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) -developed by the community to solve the "AI/ML dependency hell". CM scripts help to automatically connect -diverse and continuously changing models, software, hardware, data sets, best practices and optimization techniques -into end-to-end applications in a transparent and non-intrusive way. - -We thank [the community](https://access.cknowledge.org/playground/?action=contributors) -for helping us to validate a prototype of the MLCommons CK playground by running and reproducing -[MLPerf inference v3.0 benchmarks](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,v3.0,community-submission,open,edge,image-classification,singlestream): -CK has helped to automatically interconnect very diverse technology from Neural Magic, Qualcomm, Krai, cKnowledge, OctoML, Deelvin, DELL, HPE, Lenovo, Hugging Face, Nvidia and Apple -and run it across diverse CPUs, GPUs and DSPs with PyTorch, -ONNX, QAIC, TF/TFLite, TVM and TensorRT using popular cloud providers (GCP, AWS, Azure) and individual servers and edge devices -via our recent [open optimization challenge](https://access.cknowledge.org/playground/?action=challenges&name=optimize-mlperf-inference-v3.0-2023). +This platform is powered by the [Collective Mind automation framework (MLCommons CM)](https://github.com/mlcommons/ck) +with [portable, reusable and technology-agnostic automation recipes (CM scripts)](https://access.cknowledge.org/playground/?action=scripts) +developed by the [community](https://access.cknowledge.org/playground/?action=contributors) to solve the "AI/ML dependency hell". ### Public GUI -* [Platform preview](https://x.cKnowledge.org) +* [Platform preview](https://access.cKnowledge.org) * [GUI to run MLPerf inference benchmarks](http://cknowledge.org/mlperf-inference-gui) * [GUI to prepare MLPerf inference submissions](https://cknowledge.org/mlperf-inference-submission-gui) -### Collaborative development -This open-source technology is being developed by the -[MLCommons task force on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) -led by [Grigori Fursin](https://cKnowledge.org/gfursin) and -[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh): -* Join our [public Discord server](https://discord.gg/JjWNWXKxwT). -* Join our [public conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw). -* Check our [news](docs/news.md). -* Check our [presentation](https://doi.org/10.5281/zenodo.7871070) with development plans. -* Read about our [CK concept (previous version before MLCommons)](https://doi.org/10.1098/rsta.2020.0211). +### Collaborative development #### Source code for on-prem use @@ -55,11 +38,12 @@ using the MLCommons CM scripting language. Discuss your challenge in Discord, add your challenge [here](https://github.com/mlcommons/ck/tree/master/cm-mlops/challenge) and create a PR. + #### Private challenges You can use this platform to organize private challenges between your internal teams and external partners. -Install the MLCommons CK2 (CM) framework as described [here](https://github.com/mlcommons/ck/blob/master/docs/installation.md). +Install the MLCommons CM framework as described [here](https://github.com/mlcommons/ck/blob/master/docs/installation.md). Pull CM repository with portable MLOps automation recipes from the community: ```bash @@ -76,16 +60,6 @@ as a public or private server to run optimization experiments with your colleagues, external teams and users. -### Copyright - -2021-2023 [MLCommons](https://mlcommons.org) - ### License [Apache 2.0](LICENSE.md) - -### Acknowledgments - -This project is currently supported by [MLCommons](https://mlcommons.org), [cTuning foundation](https://cTuning.org), -[cKnowledge](https://cKnowledge.org) and [individual contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md). -We thank [HiPEAC](https://hipeac.net) and [OctoML](https://octoml.ai) for sponsoring initial development. diff --git a/platform/get-started.md b/platform/get-started.md deleted file mode 100644 index e02b7a66be..0000000000 --- a/platform/get-started.md +++ /dev/null @@ -1,38 +0,0 @@ -# Getting Started Guide - -## Reproducing and improving MLPerf inference results - -The [Collective Knowledge platform](https://access.cKnowledge.org) -is currently having experiment results from MLPerf Inference v2.0, v2.1 and v3.0 -in the [extensible CM format](https://github.com/mlcommons/ck_mlperf_results) -and with the possibility to add derived metrics such as power efficiency. - -We are currently preparing the [optimization challenge for MLPerf Inference v3.1](https://github.com/ctuning/mlcommons-ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/README.md). - -For MLPerf inference 3.1 we have the following benchmark tasks -1. [Image Classification](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md) using ResNet50 model and Imagenet-2012 dataset -2. [Object Detection](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md) using Retinanet model and OpenImages dataset -3. [Language processing](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md) using Bert-Large model and Squadv1.1 dataset -4. [Speech Recognition](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md) using RNNT model and LibriSpeech dataset -5. [Medical Imaging](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md) using 3d-unet model and KiTS19 dataset -6. Recommendation using DLRM model and Criteo dataset -7. Large Language Model (Pending) - -The tasks are divided into -1. Edge (SingleStream, MultiStream and Offline scenarios) and -2. Datacenter (Offline and Server scenarios) categories. - -Results can be submitted under -1. closed (requires compliance runs, strict accuracy requirement, no retraining and subject to audit) and -2. open divisions (only dataset is fixed). - -Results can be just performance or performance with power. - -## Participating in other optimization challenges - -Check our on-going optimization challenges [here](https://access.cknowledge.org/playground/?action=challenges) -and join our [public Discord server](https://access.cknowledge.org/playground/?action=challenges) to discuss them. - -## Further reading - -* [Project documentation](../docs/README.md) diff --git a/platform/register.md b/platform/register.md index 323e8352e8..4f2a15f4c8 100644 --- a/platform/register.md +++ b/platform/register.md @@ -1,8 +1,10 @@ -# Register for Collective Knowledge challenges +# Register for benchmarking and optimization challenges Please join the [public Discord server](https://discord.gg/JjWNWXKxwT) -from the [MLCommons Task Force on Automation and Reproducibility](../docs/taskforce.md) -and send your name, organization and URL to @gfursin and @arjunsuresh -(task force co-chairs and organizers of open challenges). +to tell the [MLCommons Task Force on Automation and Reproducibility](../docs/taskforce.md) +about your interest to participate in our benchmarking and optimization challenges. + +*We plan to add a registration GUI to the [MLCommons Collective Knowledge playground](https://access.cKnowledge.org) + in the future - please stay tuned for more details!* + -In the future, we plan to add a registration GUI to our [MLCommons Collective Knowledge playground](https://access.cKnowledge.org). diff --git a/platform/register2.md b/platform/register2.md deleted file mode 100644 index 1d9a9fde01..0000000000 --- a/platform/register2.md +++ /dev/null @@ -1,59 +0,0 @@ -# Register for Collective Knowledge challenges - -Since the [MLCommons CK playground](https://access.cKnowledge.org) -is still in the heavy development stage, the registration is not yet automated via CK GUI. - -You can simply add add your name, organization and URL in this [GitHub ticket](https://github.com/mlcommons/ck/issues/855). - -You name will be added to the [CK leaderboard](https://access.cknowledge.org/playground) -with 1 point after your PR is accepted (to support your intent to participate in our collaborative effort). - -You can add yourself to this [GitHub repository](https://github.com/mlcommons/ck/tree/master/cm-mlops/contributor) -using our [CM automation language](https://doi.org/10.5281/zenodo.8105339) from the command line as follows. - -Install [CM](../docs/installation.md) on your system. - -Fork https://github.com/mlcommons/ck . - -Pull it via CM as follows: - -```bash -cm pull repo --url={URL of the fork of github.com/mlcommons/ck} -``` - -Note that if you already have `mlcommons@ck` repository installed via CM, -you need to delete it and then install your fork: -```bash -cm rm repo mlcommons@ck --all -cm pull repo --url={URL of the fork of github.com/mlcommons/ck} -``` -Create a new contributor with your name: -```bash -cm add contributor "your name" -``` - -CM will ask you a few questions and will create a new CM contributor entry with your name. - -You can commit this entry to your fork and create a PR to https://github.com/mlcommons/ck . - -*Note that you will need to sign MLCommons CLA to contribute to MLCommons projects - it may take a few days to approve it by MLCommons*. - -Note that you will need CM and your fork of https://github.com/mlcommons/ck to participate in challenges, -so please keep and use it. - -Happy hacking! - -## Discussions - -You can now join the [public Discord server](https://discord.gg/JjWNWXKxwT) -from the [MLCommons Task Force on Automation and Reproducibility](../docs/taskforce.md) -to ask any questions, provide feedback and discuss challenges! - -## Our mission - -You can learn more about our mission [here](https://doi.org/10.5281/zenodo.8105339). - -## Organizers - -* [Grigori Fursin](https://cKnowledge.org/gfursin) and [Arjun Suresh](https://www.linkedin.com/in/arjunsuresh) - ([MLCommons](https://mlcommons.org), [cTuning.org](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org)) diff --git a/platform/scripts/1-install-deps-cloud.sh b/platform/scripts/1-install-deps-cloud.sh old mode 100755 new mode 100644 diff --git a/platform/scripts/1-install-deps.sh b/platform/scripts/1-install-deps.sh old mode 100755 new mode 100644 diff --git a/platform/scripts/2-run-in-cloud-nohup.sh b/platform/scripts/2-run-in-cloud-nohup.sh old mode 100755 new mode 100644 diff --git a/platform/scripts/2-run-in-cloud.sh b/platform/scripts/2-run-in-cloud.sh old mode 100755 new mode 100644 diff --git a/platform/scripts/restart_apache2.sh b/platform/scripts/restart_apache2.sh old mode 100755 new mode 100644 From bc4984422ffc6d60432be26ae2979b8061969f90 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 16:17:02 +0530 Subject: [PATCH 10/33] Fixes for intel gptj --- .../reproduce-mlperf-inference-intel/build_gptj_harness.sh | 4 +++- .../reproduce-mlperf-inference-intel/run_gptj_harness.sh | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/build_gptj_harness.sh b/cm-mlops/script/reproduce-mlperf-inference-intel/build_gptj_harness.sh index 2219eed64b..31eae300cb 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/build_gptj_harness.sh +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/build_gptj_harness.sh @@ -21,6 +21,9 @@ mkdir -p ${WORKLOAD_DATA}/model export INT8_MODEL_DIR=${WORKLOAD_DATA}/gpt-j-int8-model export INT4_MODEL_DIR=${WORKLOAD_DATA}/gpt-j-int4-model +if [[ -f ${INT8_MODEL_DIR}/best_model.pt ]]; then + exit 0 +fi python download-calibration-dataset.py --calibration-list-file calibration-list.txt --output-dir ${WORKLOAD_DATA}/calibration-data python download-dataset.py --split validation --output-dir ${WORKLOAD_DATA}/validation-data @@ -30,5 +33,4 @@ export VALIDATION_DATA_JSON=${WORKLOAD_DATA}/validation-data/cnn_dailymail_valid export INT4_CALIBRATION_DIR=${WORKLOAD_DATA}/quantized-int4-model #sudo -E bash run_quantization.sh bash run_quantization.sh -exit 1 test $? -eq 0 || exit $? diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/run_gptj_harness.sh b/cm-mlops/script/reproduce-mlperf-inference-intel/run_gptj_harness.sh index 2b2c733476..07585c015b 100755 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/run_gptj_harness.sh +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/run_gptj_harness.sh @@ -1,7 +1,7 @@ #!/bin/bash export PATH=${CM_CONDA_BIN_PATH}:$PATH -export KMP_BLOCKTIME=4 +export KMP_BLOCKTIME=10 export KMP_AFFINITY=granularity=fine,compact,1,0 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so From 8e847212ef84a799e04e020b39389ee8493d20a1 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 14:42:30 +0100 Subject: [PATCH 11/33] fixed get openimages on Windows: https://github.com/mlcommons/ck/issues/1104 --- cm-mlops/script/get-dataset-openimages/run.bat | 4 +++- .../get-preprocessed-dataset-openimages/preprocess.py | 1 + .../script/get-preprocessed-dataset-openimages/run.bat | 1 + .../script/test-mlperf-inference-retinanet-win/run.bat | 2 +- cm-mlops/script/test-mlperf-inference-retinanet-win/run.sh | 7 +++++++ 5 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 cm-mlops/script/get-preprocessed-dataset-openimages/run.bat create mode 100644 cm-mlops/script/test-mlperf-inference-retinanet-win/run.sh diff --git a/cm-mlops/script/get-dataset-openimages/run.bat b/cm-mlops/script/get-dataset-openimages/run.bat index 3b1b6a15e8..742542d251 100644 --- a/cm-mlops/script/get-dataset-openimages/run.bat +++ b/cm-mlops/script/get-dataset-openimages/run.bat @@ -18,5 +18,7 @@ if not "%CM_DATASET_SIZE%" == "" ( %CM_PYTHON_BIN% tools\openimages.py %MAX_IMAGES% --dataset-dir=%INSTALL_DIR% --output-labels=openimages-mlperf.json --classes %CM_DATASET_OPENIMAGES_CLASSES% IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% +rem Next is a hack to support MLPerf inference on Windows cd %INSTALL_DIR% -move validation\data\* . \ No newline at end of file +if not exist validation\data\annotations mkdir validation\data\annotations +copy annotations\* validation\data\annotations diff --git a/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py b/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py index 71f7554b32..b2b05fe1dc 100644 --- a/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py +++ b/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py @@ -1,6 +1,7 @@ import os import sys import os.path + mlperf_src_path = os.environ['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'] python_path = os.path.join(mlperf_src_path, "python") sys.path.insert(0, python_path) diff --git a/cm-mlops/script/get-preprocessed-dataset-openimages/run.bat b/cm-mlops/script/get-preprocessed-dataset-openimages/run.bat new file mode 100644 index 0000000000..f3ccd2da7b --- /dev/null +++ b/cm-mlops/script/get-preprocessed-dataset-openimages/run.bat @@ -0,0 +1 @@ +%CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\preprocess.py diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat b/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat index 0fd4284329..38970bc0ef 100644 --- a/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat +++ b/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat @@ -5,4 +5,4 @@ set SCRIPT_DIR=%CM_TMP_CURRENT_SCRIPT_PATH% cd %CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH% -%CM_PYTHON_BIN% python/main.py --profile retinanet-onnxruntime --scenario Offline --model %CM_ML_MODEL_FILE_WITH_PATH% --dataset-path %CM_DATASET_PATH_ROOT% --accuracy +%CM_PYTHON_BIN_WITH_PATH% python/main.py --profile retinanet-onnxruntime --scenario Offline --model %CM_ML_MODEL_FILE_WITH_PATH% --dataset-path %CM_DATASET_PATH_ROOT%\validation\data --accuracy diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/run.sh b/cm-mlops/script/test-mlperf-inference-retinanet-win/run.sh new file mode 100644 index 0000000000..0bc3d3a8d3 --- /dev/null +++ b/cm-mlops/script/test-mlperf-inference-retinanet-win/run.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +echo "" + +cd ${CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH} + +${CM_PYTHON_BIN_WITH_PATH} python/main.py --profile retinanet-onnxruntime --scenario Offline --model ${CM_ML_MODEL_FILE_WITH_PATH} --dataset-path ${CM_DATASET_PATH_ROOT} --accuracy From 9c09bc8e6f6f6eb055c1d82a3350e4352a4f419c Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 14:41:12 +0100 Subject: [PATCH 12/33] fixed mlperf retinanet example --- .../test-mlperf-inference-retinanet/README.md | 146 ++++++++++++++++++ .../test-mlperf-inference-retinanet/_cm.json | 49 ++++++ .../customize.py | 18 +++ .../test-mlperf-inference-retinanet/run.bat | 8 + .../test-mlperf-inference-retinanet/run.sh | 9 ++ 5 files changed, 230 insertions(+) create mode 100644 cm-mlops/script/test-mlperf-inference-retinanet/README.md create mode 100644 cm-mlops/script/test-mlperf-inference-retinanet/_cm.json create mode 100644 cm-mlops/script/test-mlperf-inference-retinanet/customize.py create mode 100644 cm-mlops/script/test-mlperf-inference-retinanet/run.bat create mode 100644 cm-mlops/script/test-mlperf-inference-retinanet/run.sh diff --git a/cm-mlops/script/test-mlperf-inference-retinanet/README.md b/cm-mlops/script/test-mlperf-inference-retinanet/README.md new file mode 100644 index 0000000000..e9531fa189 --- /dev/null +++ b/cm-mlops/script/test-mlperf-inference-retinanet/README.md @@ -0,0 +1,146 @@ +
+Click here to see the table of contents. + +* [About](#about) +* [Summary](#summary) +* [Reuse this script in your project](#reuse-this-script-in-your-project) + * [ Install CM automation language](#install-cm-automation-language) + * [ Check CM script flags](#check-cm-script-flags) + * [ Run this script from command line](#run-this-script-from-command-line) + * [ Run this script from Python](#run-this-script-from-python) + * [ Run this script via GUI](#run-this-script-via-gui) + * [ Run this script via Docker (beta)](#run-this-script-via-docker-(beta)) +* [Customization](#customization) + * [ Default environment](#default-environment) +* [Script workflow, dependencies and native scripts](#script-workflow-dependencies-and-native-scripts) +* [Script output](#script-output) +* [New environment keys (filter)](#new-environment-keys-(filter)) +* [New environment keys auto-detected from customize](#new-environment-keys-auto-detected-from-customize) +* [Maintainers](#maintainers) + +
+ +*Note that this README is automatically generated - don't edit!* + +### About + +#### Summary + +* Category: *CM interface prototyping.* +* CM GitHub repository: *[mlcommons@ck](https://github.com/mlcommons/ck/tree/master/cm-mlops)* +* GitHub directory for this script: *[GitHub](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win)* +* CM meta description for this script: *[_cm.json](_cm.json)* +* CM "database" tags to find this script: *test,mlperf-inference-win,retinanet,windows* +* Output cached? *False* +___ +### Reuse this script in your project + +#### Install CM automation language + +* [Installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +* [CM intro](https://doi.org/10.5281/zenodo.8105339) + +#### Pull CM repository with this automation + +```cm pull repo mlcommons@ck``` + + +#### Run this script from command line + +1. `cm run script --tags=test,mlperf-inference-win,retinanet,windows ` + +2. `cmr "test mlperf-inference-win retinanet windows" ` + +#### Run this script from Python + +
+Click here to expand this section. + +```python + +import cmind + +r = cmind.access({'action':'run' + 'automation':'script', + 'tags':'test,mlperf-inference-win,retinanet,windows' + 'out':'con', + ... + (other input keys for this script) + ... + }) + +if r['return']>0: + print (r['error']) + +``` + +
+ + +#### Run this script via GUI + +```cmr "cm gui" --script="test,mlperf-inference-win,retinanet,windows"``` + +Use this [online GUI](https://cKnowledge.org/cm-gui/?tags=test,mlperf-inference-win,retinanet,windows) to generate CM CMD. + +#### Run this script via Docker (beta) + +`cm docker script "test mlperf-inference-win retinanet windows" ` + +___ +### Customization + +#### Default environment + +
+Click here to expand this section. + +These keys can be updated via `--env.KEY=VALUE` or `env` dictionary in `@input.json` or using script flags. + + +
+ +___ +### Script workflow, dependencies and native scripts + +
+Click here to expand this section. + + 1. ***Read "deps" on other CM scripts from [meta](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json)*** + * get,sys-utils-cm + - CM script: [get-sys-utils-cm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-sys-utils-cm) + * get,python3 + * CM names: `--adr.['python', 'python3']...` + - CM script: [get-python3](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-python3) + * get,generic-python-lib,_requests + - CM script: [get-generic-python-lib](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-generic-python-lib) + * get,loadgen + * CM names: `--adr.['loadgen', 'mlperf-inference-loadgen']...` + - CM script: [get-mlperf-inference-loadgen](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-loadgen) + * mlperf,inference,source + * CM names: `--adr.['inference-src']...` + - CM script: [get-mlperf-inference-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-src) + * get,dataset,open-images,original + - CM script: [get-dataset-openimages](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-dataset-openimages) + * get,raw,ml-model,retinanet + - CM script: [get-ml-model-retinanet](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-retinanet) + 1. ***Run "preprocess" function from [customize.py](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/customize.py)*** + 1. Read "prehook_deps" on other CM scripts from [meta](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json) + 1. ***Run native script if exists*** + * [run.bat](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat) + 1. Read "posthook_deps" on other CM scripts from [meta](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json) + 1. ***Run "postrocess" function from [customize.py](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/customize.py)*** + 1. Read "post_deps" on other CM scripts from [meta](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json) +
+ +___ +### Script output +`cmr "test mlperf-inference-win retinanet windows" -j` +#### New environment keys (filter) + +#### New environment keys auto-detected from customize + +___ +### Maintainers + +* [Open MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) \ No newline at end of file diff --git a/cm-mlops/script/test-mlperf-inference-retinanet/_cm.json b/cm-mlops/script/test-mlperf-inference-retinanet/_cm.json new file mode 100644 index 0000000000..fb8be75934 --- /dev/null +++ b/cm-mlops/script/test-mlperf-inference-retinanet/_cm.json @@ -0,0 +1,49 @@ +{ + "alias": "test-mlperf-inference-retinanet", + "automation_alias": "script", + "automation_uid": "5b4e0237da074764", + "category": "CM interface prototyping", + "deps": [ + { + "tags": "get,sys-utils-cm" + }, + { + "names": [ + "python", + "python3" + ], + "tags": "get,python3" + }, + { + "tags": "get,generic-python-lib,_requests" + }, + { + "names": [ + "loadgen", "mlperf-inference-loadgen" + ], + "tags": "get,loadgen" + }, + { + "force_env_keys": [ + "CM_GIT_*" + ], + "names": [ + "inference-src" + ], + "tags": "mlperf,inference,source" + }, + { + "tags": "get,dataset,open-images,original" + }, + { + "tags": "get,raw,ml-model,retinanet" + } + ], + "tags": [ + "test", + "mlperf-inference-win", + "retinanet", + "windows" + ], + "uid": "1cedbc3b642a403a" +} diff --git a/cm-mlops/script/test-mlperf-inference-retinanet/customize.py b/cm-mlops/script/test-mlperf-inference-retinanet/customize.py new file mode 100644 index 0000000000..14e20d1bf2 --- /dev/null +++ b/cm-mlops/script/test-mlperf-inference-retinanet/customize.py @@ -0,0 +1,18 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + meta = i['meta'] + + return {'return':0} + + +def postprocess(i): + + env = i['env'] + state = i['state'] + + return {'return':0} diff --git a/cm-mlops/script/test-mlperf-inference-retinanet/run.bat b/cm-mlops/script/test-mlperf-inference-retinanet/run.bat new file mode 100644 index 0000000000..38970bc0ef --- /dev/null +++ b/cm-mlops/script/test-mlperf-inference-retinanet/run.bat @@ -0,0 +1,8 @@ +echo. + +set CUR_DIR=%cd% +set SCRIPT_DIR=%CM_TMP_CURRENT_SCRIPT_PATH% + +cd %CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH% + +%CM_PYTHON_BIN_WITH_PATH% python/main.py --profile retinanet-onnxruntime --scenario Offline --model %CM_ML_MODEL_FILE_WITH_PATH% --dataset-path %CM_DATASET_PATH_ROOT%\validation\data --accuracy diff --git a/cm-mlops/script/test-mlperf-inference-retinanet/run.sh b/cm-mlops/script/test-mlperf-inference-retinanet/run.sh new file mode 100644 index 0000000000..b437374079 --- /dev/null +++ b/cm-mlops/script/test-mlperf-inference-retinanet/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +echo "" + +cd ${CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH} + +ln -s ${CM_DATASET_PATH_ROOT}/annotations ${CM_DATASET_PATH_ROOT}/validation/data/annotations + +${CM_PYTHON_BIN_WITH_PATH} python/main.py --profile retinanet-onnxruntime --scenario Offline --model ${CM_ML_MODEL_FILE_WITH_PATH} --dataset-path ${CM_DATASET_PATH_ROOT}/validation/data --accuracy From 7b1ef52d98d4af8a0651433b87007df7b9ed1b96 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 14:56:50 +0100 Subject: [PATCH 13/33] clean up --- .../README.md | 146 ------------------ .../_cm.json | 49 ------ .../customize.py | 18 --- .../run.bat | 8 - .../run.sh | 7 - 5 files changed, 228 deletions(-) delete mode 100644 cm-mlops/script/test-mlperf-inference-retinanet-win/README.md delete mode 100644 cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json delete mode 100644 cm-mlops/script/test-mlperf-inference-retinanet-win/customize.py delete mode 100644 cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat delete mode 100644 cm-mlops/script/test-mlperf-inference-retinanet-win/run.sh diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/README.md b/cm-mlops/script/test-mlperf-inference-retinanet-win/README.md deleted file mode 100644 index e9531fa189..0000000000 --- a/cm-mlops/script/test-mlperf-inference-retinanet-win/README.md +++ /dev/null @@ -1,146 +0,0 @@ -
-Click here to see the table of contents. - -* [About](#about) -* [Summary](#summary) -* [Reuse this script in your project](#reuse-this-script-in-your-project) - * [ Install CM automation language](#install-cm-automation-language) - * [ Check CM script flags](#check-cm-script-flags) - * [ Run this script from command line](#run-this-script-from-command-line) - * [ Run this script from Python](#run-this-script-from-python) - * [ Run this script via GUI](#run-this-script-via-gui) - * [ Run this script via Docker (beta)](#run-this-script-via-docker-(beta)) -* [Customization](#customization) - * [ Default environment](#default-environment) -* [Script workflow, dependencies and native scripts](#script-workflow-dependencies-and-native-scripts) -* [Script output](#script-output) -* [New environment keys (filter)](#new-environment-keys-(filter)) -* [New environment keys auto-detected from customize](#new-environment-keys-auto-detected-from-customize) -* [Maintainers](#maintainers) - -
- -*Note that this README is automatically generated - don't edit!* - -### About - -#### Summary - -* Category: *CM interface prototyping.* -* CM GitHub repository: *[mlcommons@ck](https://github.com/mlcommons/ck/tree/master/cm-mlops)* -* GitHub directory for this script: *[GitHub](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win)* -* CM meta description for this script: *[_cm.json](_cm.json)* -* CM "database" tags to find this script: *test,mlperf-inference-win,retinanet,windows* -* Output cached? *False* -___ -### Reuse this script in your project - -#### Install CM automation language - -* [Installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) -* [CM intro](https://doi.org/10.5281/zenodo.8105339) - -#### Pull CM repository with this automation - -```cm pull repo mlcommons@ck``` - - -#### Run this script from command line - -1. `cm run script --tags=test,mlperf-inference-win,retinanet,windows ` - -2. `cmr "test mlperf-inference-win retinanet windows" ` - -#### Run this script from Python - -
-Click here to expand this section. - -```python - -import cmind - -r = cmind.access({'action':'run' - 'automation':'script', - 'tags':'test,mlperf-inference-win,retinanet,windows' - 'out':'con', - ... - (other input keys for this script) - ... - }) - -if r['return']>0: - print (r['error']) - -``` - -
- - -#### Run this script via GUI - -```cmr "cm gui" --script="test,mlperf-inference-win,retinanet,windows"``` - -Use this [online GUI](https://cKnowledge.org/cm-gui/?tags=test,mlperf-inference-win,retinanet,windows) to generate CM CMD. - -#### Run this script via Docker (beta) - -`cm docker script "test mlperf-inference-win retinanet windows" ` - -___ -### Customization - -#### Default environment - -
-Click here to expand this section. - -These keys can be updated via `--env.KEY=VALUE` or `env` dictionary in `@input.json` or using script flags. - - -
- -___ -### Script workflow, dependencies and native scripts - -
-Click here to expand this section. - - 1. ***Read "deps" on other CM scripts from [meta](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json)*** - * get,sys-utils-cm - - CM script: [get-sys-utils-cm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-sys-utils-cm) - * get,python3 - * CM names: `--adr.['python', 'python3']...` - - CM script: [get-python3](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-python3) - * get,generic-python-lib,_requests - - CM script: [get-generic-python-lib](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-generic-python-lib) - * get,loadgen - * CM names: `--adr.['loadgen', 'mlperf-inference-loadgen']...` - - CM script: [get-mlperf-inference-loadgen](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-loadgen) - * mlperf,inference,source - * CM names: `--adr.['inference-src']...` - - CM script: [get-mlperf-inference-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-src) - * get,dataset,open-images,original - - CM script: [get-dataset-openimages](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-dataset-openimages) - * get,raw,ml-model,retinanet - - CM script: [get-ml-model-retinanet](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-retinanet) - 1. ***Run "preprocess" function from [customize.py](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/customize.py)*** - 1. Read "prehook_deps" on other CM scripts from [meta](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json) - 1. ***Run native script if exists*** - * [run.bat](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat) - 1. Read "posthook_deps" on other CM scripts from [meta](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json) - 1. ***Run "postrocess" function from [customize.py](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/customize.py)*** - 1. Read "post_deps" on other CM scripts from [meta](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json) -
- -___ -### Script output -`cmr "test mlperf-inference-win retinanet windows" -j` -#### New environment keys (filter) - -#### New environment keys auto-detected from customize - -___ -### Maintainers - -* [Open MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) \ No newline at end of file diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json b/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json deleted file mode 100644 index 8d3bb8861d..0000000000 --- a/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "alias": "test-mlperf-inference-retinanet-win", - "automation_alias": "script", - "automation_uid": "5b4e0237da074764", - "category": "CM interface prototyping", - "deps": [ - { - "tags": "get,sys-utils-cm" - }, - { - "names": [ - "python", - "python3" - ], - "tags": "get,python3" - }, - { - "tags": "get,generic-python-lib,_requests" - }, - { - "names": [ - "loadgen", "mlperf-inference-loadgen" - ], - "tags": "get,loadgen" - }, - { - "force_env_keys": [ - "CM_GIT_*" - ], - "names": [ - "inference-src" - ], - "tags": "mlperf,inference,source" - }, - { - "tags": "get,dataset,open-images,original" - }, - { - "tags": "get,raw,ml-model,retinanet" - } - ], - "tags": [ - "test", - "mlperf-inference-win", - "retinanet", - "windows" - ], - "uid": "1cedbc3b642a403a" -} diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/customize.py b/cm-mlops/script/test-mlperf-inference-retinanet-win/customize.py deleted file mode 100644 index 14e20d1bf2..0000000000 --- a/cm-mlops/script/test-mlperf-inference-retinanet-win/customize.py +++ /dev/null @@ -1,18 +0,0 @@ -from cmind import utils -import os - -def preprocess(i): - - os_info = i['os_info'] - env = i['env'] - meta = i['meta'] - - return {'return':0} - - -def postprocess(i): - - env = i['env'] - state = i['state'] - - return {'return':0} diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat b/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat deleted file mode 100644 index 38970bc0ef..0000000000 --- a/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat +++ /dev/null @@ -1,8 +0,0 @@ -echo. - -set CUR_DIR=%cd% -set SCRIPT_DIR=%CM_TMP_CURRENT_SCRIPT_PATH% - -cd %CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH% - -%CM_PYTHON_BIN_WITH_PATH% python/main.py --profile retinanet-onnxruntime --scenario Offline --model %CM_ML_MODEL_FILE_WITH_PATH% --dataset-path %CM_DATASET_PATH_ROOT%\validation\data --accuracy diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/run.sh b/cm-mlops/script/test-mlperf-inference-retinanet-win/run.sh deleted file mode 100644 index 0bc3d3a8d3..0000000000 --- a/cm-mlops/script/test-mlperf-inference-retinanet-win/run.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -echo "" - -cd ${CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH} - -${CM_PYTHON_BIN_WITH_PATH} python/main.py --profile retinanet-onnxruntime --scenario Offline --model ${CM_ML_MODEL_FILE_WITH_PATH} --dataset-path ${CM_DATASET_PATH_ROOT} --accuracy From c7d8dc5a3b643e8114fea0237dde0ea9efc356fc Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 14:29:15 +0000 Subject: [PATCH 14/33] Support version info dump --- cm-mlops/automation/script/module.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index e7982a08a5..84159bf018 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -1617,6 +1617,11 @@ def run(self, i): with open('readme.md', 'w') as f: f.write(readme) + if i.get('dump_version_info'): + import json + with open('version_info.json', 'w') as f: + f.write(json.dumps(run_state['version_info'], indent=2)) + rr = {'return':0, 'env':env, 'new_env':new_env, 'state':state, 'new_state':new_state, 'deps': run_state['deps']} if i.get('json', False) or i.get('j', False): From 9e2e0bc9d020cd47fdf64cd175739d8d0313c27b Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 17:11:13 +0100 Subject: [PATCH 15/33] added host info in MLPerf result readmes: https://github.com/mlcommons/ck/issues/1071 --- .../customize.py | 4 +++- .../script/app-mlperf-inference/customize.py | 19 ++++++++++++++++--- .../customize.py | 3 ++- .../hardware/default.json | 2 +- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/cm-mlops/script/app-mlperf-inference-reference/customize.py b/cm-mlops/script/app-mlperf-inference-reference/customize.py index c0563ec0d6..518b6a5a90 100644 --- a/cm-mlops/script/app-mlperf-inference-reference/customize.py +++ b/cm-mlops/script/app-mlperf-inference-reference/customize.py @@ -195,7 +195,9 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio if os_info['platform'] == 'windows': cmd = "python python/main.py --profile "+env['CM_MODEL']+"-"+env['CM_MLPERF_BACKEND'] + \ " --model=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_PREPROCESSED_PATH'] + \ - " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + \ + " --output " + env['OUTPUT_DIR'] + " " + \ + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + dataset_options else: cmd = "./run_local.sh " + env['CM_MLPERF_BACKEND'] + ' ' + \ diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 21b95b67d2..06dc639948 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -1,10 +1,13 @@ from cmind import utils + import os import json import shutil import subprocess import copy import cmind as cm +import platform +import sys def preprocess(i): @@ -27,6 +30,9 @@ def preprocess(i): def postprocess(i): + os_info = i['os_info'] + + xsep = '^' if os_info['platform'] == 'windows' else '\\' env = i['env'] inp = i['input'] @@ -193,14 +199,21 @@ def postprocess(i): if "cmd" in inp: cmd = "cm run script \\\n\t"+" \\\n\t".join(inp['cmd']) + xcmd = "cm run script "+xsep+"\n\t" + (" "+xsep+"\n\t").join(inp['cmd']) else: cmd = "" + xcmd = "" + + readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/ck).\n\n" + + readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), + platform.processor(), sys.version, cm.__version__) - readme_init = "This experiment is generated using [MLCommons CM](https://github.com/mlcommons/ck)\n" - readme_body = "## CM Run Command\n```\n" + cmd + "\n```" + readme_body += "## CM Run Command\n\nSee [CM installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md).\n\n"+ \ + "```bash\npip install cmind\n\ncm pull repo mlcommons@ck\n\n{}\n```".format(xcmd) if env.get('CM_MLPERF_README', '') == "yes": - readme_body += "\n## Dependent CM scripts \n" + readme_body += "\n## Dependent CM scripts\n\n" script_tags = inp['tags'] script_adr = inp.get('adr', {}) diff --git a/cm-mlops/script/generate-mlperf-inference-submission/customize.py b/cm-mlops/script/generate-mlperf-inference-submission/customize.py index 0db498bb13..64a3b0abfa 100644 --- a/cm-mlops/script/generate-mlperf-inference-submission/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-submission/customize.py @@ -2,6 +2,7 @@ import os import json import shutil +import cmind def preprocess(i): return {'return': 0} @@ -79,7 +80,7 @@ def generate_submission(i): print('* MLPerf inference submitter: {}'.format(submitter)) if 'Collective' not in system_meta.get('sw_notes'): - system_meta['sw_notes'] = "Powered by MLCommons Collective Mind framework (CK2). " + system_meta['sw_notes'] + system_meta['sw_notes'] = "Automated by MLCommons CM v{}. ".format(cmind.__version__) + system_meta['sw_notes'] if env.get('CM_MLPERF_SUT_SW_NOTES_EXTRA','') != '': sw_notes = f"{system_meta['sw_notes']} {env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}" diff --git a/cm-mlops/script/get-mlperf-inference-sut-description/hardware/default.json b/cm-mlops/script/get-mlperf-inference-sut-description/hardware/default.json index 7cc293c1d0..b7cf960dbb 100644 --- a/cm-mlops/script/get-mlperf-inference-sut-description/hardware/default.json +++ b/cm-mlops/script/get-mlperf-inference-sut-description/hardware/default.json @@ -20,7 +20,7 @@ "number_of_nodes": "1", "status": "available", "submitter": "cTuning", - "sw_notes": "Powered by MLCommons CM (CK2)", + "sw_notes": "Automated by MLCommons CM", "system_type": "edge", "system_type_detail": "edge server" } From f05451441aa7a765cc199a7e42c70ebc17b6c7da Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 16:15:52 +0000 Subject: [PATCH 16/33] Support version dump --- cm-mlops/automation/script/module.py | 46 ++++++++++++------- .../customize.py | 12 ++++- .../script/app-mlperf-inference/customize.py | 4 ++ .../script/run-mlperf-inference-app/_cm.yaml | 1 + .../run-mlperf-inference-app/customize.py | 5 +- 5 files changed, 48 insertions(+), 20 deletions(-) diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index 84159bf018..69ee3ad9aa 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -30,7 +30,7 @@ def __init__(self, cmind, automation_file): self.run_state['deps'] = [] self.run_state['fake_deps'] = False self.run_state['parent'] = None - self.run_state['version_info'] = {} + self.run_state['version_info'] = [] self.file_with_cached_state = 'cm-cached-state.json' @@ -289,8 +289,8 @@ def run(self, i): if fake_deps: env['CM_TMP_FAKE_DEPS']='yes' run_state = i.get('run_state', self.run_state) - if run_state.get('version_info', '') == '': - run_state['version_info'] = {} + if not run_state.get('version_info', []): + run_state['version_info'] = [] if run_state.get('parent', '') == '': run_state['parent'] = None if fake_deps: @@ -643,7 +643,7 @@ def run(self, i): if i.get('help',False): return utils.call_internal_module(self, __file__, 'module_help', 'print_help', {'meta':meta, 'path':path}) - + run_state['script_uid'] = meta['uid'] deps = meta.get('deps',[]) post_deps = meta.get('post_deps',[]) prehook_deps = meta.get('prehook_deps',[]) @@ -1582,22 +1582,27 @@ def run(self, i): if not version and detected_version: version = detected_version + if version: script_uid = script_artifact.meta.get('uid') script_alias = script_artifact.meta.get('alias') script_tags = script_artifact.meta.get('tags') - tags = i.get('tags') - run_state['version_info'][script_uid] = {} - run_state['version_info'][script_uid]['alias'] = script_alias - run_state['version_info'][script_uid]['script_tags'] = script_tags - run_state['version_info'][script_uid]['variation_tags'] = variation_tags - run_state['version_info'][script_uid]['version'] = version - + version_info = {} + version_info_tags = ",".join(script_tags + variation_tags) + version_info[version_info_tags] = {} + version_info[version_info_tags]['script_uid'] = script_uid + version_info[version_info_tags]['script_alias'] = script_alias + version_info[version_info_tags]['version'] = version + version_info[version_info_tags]['parent'] = run_state['parent'] + + run_state['version_info'].append(version_info) script_versions = detected_versions.get(meta['uid'], []) if not script_versions: detected_versions[meta['uid']] = [ version ] else: script_versions.append(version) + else: + pass # these scripts don't have versions. Should we use cm mlops version here? ############################# RETURN elapsed_time = time.time() - start_time @@ -1618,9 +1623,9 @@ def run(self, i): f.write(readme) if i.get('dump_version_info'): - import json - with open('version_info.json', 'w') as f: - f.write(json.dumps(run_state['version_info'], indent=2)) + r = self._dump_version_info_for_script() + if r['return'] > 0: + return r rr = {'return':0, 'env':env, 'new_env':new_env, 'state':state, 'new_state':new_state, 'deps': run_state['deps']} @@ -1636,6 +1641,12 @@ def run(self, i): return rr + def _dump_version_info_for_script(self, output_dir = os.getcwd()): + import json + with open(os.path.join(output_dir, 'version_info.json'), 'w') as f: + f.write(json.dumps(self.run_state['version_info'], indent=2)) + return {'return': 0} + def _update_state_from_variations(self, i, meta, variation_tags, variations, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, add_deps_recursive, run_state, recursion_spaces, verbose): # Save current explicit variations @@ -2691,7 +2702,7 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a tmp_run_state_deps = copy.deepcopy(run_state['deps']) run_state['deps'] = [] tmp_parent = run_state['parent'] - run_state['parent'] = self.meta['uid'] + run_state['parent'] = run_state['script_uid'] # Run collective script via CM API: # Not very efficient but allows logging - can be optimized later @@ -3979,6 +3990,8 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"): verbose = i.get('verbose', False) if not verbose: verbose = i.get('v', False) + show_time = i.get('time', False) + recursion = i.get('recursion', False) found_script_tags = i.get('found_script_tags', []) debug_script_tags = i.get('debug_script_tags', '') @@ -4148,10 +4161,9 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"): if customize_code is not None: print (recursion_spaces+' ! call "{}" from {}'.format(postprocess, customize_code.__file__)) - if len(posthook_deps)>0 and (postprocess == "postprocess"): r = script_automation._call_run_deps(posthook_deps, local_env_keys, local_env_keys_from_meta, env, state, const, const_state, - add_deps_recursive, recursion_spaces, remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, run_state) + add_deps_recursive, recursion_spaces, remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, ' ', run_state) if r['return']>0: return r if (postprocess == "postprocess") and customize_code is not None and 'postprocess' in dir(customize_code): diff --git a/cm-mlops/script/app-mlperf-inference-reference/customize.py b/cm-mlops/script/app-mlperf-inference-reference/customize.py index 518b6a5a90..e253296a12 100644 --- a/cm-mlops/script/app-mlperf-inference-reference/customize.py +++ b/cm-mlops/script/app-mlperf-inference-reference/customize.py @@ -361,11 +361,11 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio def postprocess(i): env = i['env'] + state = i['state'] if env.get('CM_MLPERF_README', "") == "yes": import cmind as cm inp = i['input'] - state = i['state'] script_tags = inp['tags'] script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) @@ -386,4 +386,14 @@ def postprocess(i): state['mlperf-inference-implementation'] = {} state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + if env.get('CM_DUMP_VERSION_INFO', True): + if not state.get('mlperf-inference-implementation', {}): + state['mlperf-inference-implementation'] = {} + run_state = i['run_script_input']['run_state'] + state['mlperf-inference-implementation'][run_state['script_uid']] = {} + version_info = {} + version_info[run_state['script_uid']] = run_state['version_info'] + + state['mlperf-inference-implementation']['version_info'] = version_info + return {'return':0} diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 06dc639948..dc8a21b411 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -320,4 +320,8 @@ def postprocess(i): if accuracy_result_dir != '': env['CM_MLPERF_ACCURACY_RESULTS_DIR'] = accuracy_result_dir + if state.get('mlperf-inference-implementation') and state['mlperf-inference-implementation'].get('version_info'): + with open(os.path.join(output_dir, "version_info.json"), "w") as f: + f.write(json.dumps(state['mlperf-inference-implementation']['version_info'], indent=2)) + return {'return':0} diff --git a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml index 1d1e824849..95dcac2dc9 100644 --- a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml +++ b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml @@ -44,6 +44,7 @@ input_mapping: debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM device: CM_MLPERF_DEVICE division: CM_MLPERF_SUBMISSION_DIVISION + dump_version_info: CM_DUMP_VERSION_INFO execution_mode: CM_MLPERF_EXECUTION_MODE find_performance: CM_MLPERF_FIND_PERFORMANCE_MODE gpu_name: CM_NVIDIA_GPU_NAME diff --git a/cm-mlops/script/run-mlperf-inference-app/customize.py b/cm-mlops/script/run-mlperf-inference-app/customize.py index 56a6624c44..a7f6e54c17 100644 --- a/cm-mlops/script/run-mlperf-inference-app/customize.py +++ b/cm-mlops/script/run-mlperf-inference-app/customize.py @@ -19,6 +19,7 @@ def preprocess(i): if env.get('CM_RUN_DOCKER_CONTAINER', '') == "yes": return {'return':0} + dump_version_info = env.get('CM_DUMP_VERSION_INFO', True) system_meta = state['CM_SUT_META'] env['CM_SUT_META_EXISTS'] = "yes" @@ -161,7 +162,7 @@ def preprocess(i): print(f"\nRunning loadgen scenario: {scenario} and mode: {mode}") ii = {'action':'run', 'automation':'script', 'tags': scenario_tags, 'quiet': 'true', 'env': env, 'input': inp, 'state': state, 'add_deps': add_deps, 'add_deps_recursive': - copy.deepcopy(add_deps_recursive), 'ad': ad, 'adr': copy.deepcopy(adr), 'v': verbose, 'print_env': print_env, 'print_deps': print_deps} + copy.deepcopy(add_deps_recursive), 'ad': ad, 'adr': copy.deepcopy(adr), 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info} r = cm.access(ii) if r['return'] > 0: return r @@ -186,7 +187,7 @@ def preprocess(i): env['CM_MLPERF_LOADGEN_MODE'] = "compliance" r = cm.access({'action':'run', 'automation':'script', 'tags': scenario_tags, 'quiet': 'true', 'env': env, 'input': inp, 'state': state, 'add_deps': add_deps, 'add_deps_recursive': - copy.deepcopy(add_deps_recursive), 'adr': copy.deepcopy(adr), 'ad': ad, 'v': verbose, 'print_env': print_env, 'print_deps': print_deps}) + copy.deepcopy(add_deps_recursive), 'adr': copy.deepcopy(adr), 'ad': ad, 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info}) if r['return'] > 0: return r From 50208da89ace8227154699aaeb6cf98a82088e40 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 16:23:45 +0000 Subject: [PATCH 17/33] Support version_dump for all mlperf inference implementations --- .../script/app-mlperf-inference-cpp/customize.py | 13 ++++++++++++- .../app-mlperf-inference-tflite-cpp/customize.py | 13 ++++++++++++- .../reproduce-mlperf-inference-intel/customize.py | 13 ++++++++++++- .../reproduce-mlperf-inference-nvidia/customize.py | 12 +++++++++++- 4 files changed, 47 insertions(+), 4 deletions(-) diff --git a/cm-mlops/script/app-mlperf-inference-cpp/customize.py b/cm-mlops/script/app-mlperf-inference-cpp/customize.py index aeca71a9b1..ddbb6a2fab 100644 --- a/cm-mlops/script/app-mlperf-inference-cpp/customize.py +++ b/cm-mlops/script/app-mlperf-inference-cpp/customize.py @@ -82,10 +82,11 @@ def preprocess(i): def postprocess(i): env = i['env'] + state = i['state'] + if env.get('CM_MLPERF_README', '') == "yes": import cmind as cm inp = i['input'] - state = i['state'] script_tags = inp['tags'] script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) @@ -106,4 +107,14 @@ def postprocess(i): state['mlperf-inference-implementation'] = {} state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + if env.get('CM_DUMP_VERSION_INFO', True): + if not state.get('mlperf-inference-implementation', {}): + state['mlperf-inference-implementation'] = {} + run_state = i['run_script_input']['run_state'] + state['mlperf-inference-implementation'][run_state['script_uid']] = {} + version_info = {} + version_info[run_state['script_uid']] = run_state['version_info'] + + state['mlperf-inference-implementation']['version_info'] = version_info + return {'return':0} diff --git a/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py b/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py index 9ff3d920b0..dcfcd4c964 100644 --- a/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py +++ b/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py @@ -91,10 +91,11 @@ def preprocess(i): def postprocess(i): env = i['env'] + state = i['state'] + if env.get('CM_MLPERF_README', '') == "yes": import cmind as cm inp = i['input'] - state = i['state'] script_tags = inp['tags'] script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) @@ -115,4 +116,14 @@ def postprocess(i): state['mlperf-inference-implementation'] = {} state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + if env.get('CM_DUMP_VERSION_INFO', True): + if not state.get('mlperf-inference-implementation', {}): + state['mlperf-inference-implementation'] = {} + run_state = i['run_script_input']['run_state'] + state['mlperf-inference-implementation'][run_state['script_uid']] = {} + version_info = {} + version_info[run_state['script_uid']] = run_state['version_info'] + + state['mlperf-inference-implementation']['version_info'] = version_info + return {'return':0} diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py b/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py index 05279e631e..8d5c16ca74 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py @@ -92,10 +92,11 @@ def preprocess(i): def postprocess(i): env = i['env'] + state = i['state'] + if env.get('CM_MLPERF_README', '') == "yes": import cmind as cm inp = i['input'] - state = i['state'] script_tags = inp['tags'] script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) @@ -116,4 +117,14 @@ def postprocess(i): state['mlperf-inference-implementation'] = {} state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + if env.get('CM_DUMP_VERSION_INFO', True): + if not state.get('mlperf-inference-implementation', {}): + state['mlperf-inference-implementation'] = {} + run_state = i['run_script_input']['run_state'] + state['mlperf-inference-implementation'][run_state['script_uid']] = {} + version_info = {} + version_info[run_state['script_uid']] = run_state['version_info'] + + state['mlperf-inference-implementation']['version_info'] = version_info + return {'return':0} diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py index df0288e8d9..633dfc27e9 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py @@ -416,11 +416,11 @@ def preprocess(i): def postprocess(i): env = i['env'] + state = i['state'] if env.get('CM_MLPERF_README', '') == "yes": import cmind as cm inp = i['input'] - state = i['state'] script_tags = inp['tags'] script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) @@ -441,4 +441,14 @@ def postprocess(i): state['mlperf-inference-implementation'] = {} state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + if env.get('CM_DUMP_VERSION_INFO', True): + if not state.get('mlperf-inference-implementation', {}): + state['mlperf-inference-implementation'] = {} + run_state = i['run_script_input']['run_state'] + state['mlperf-inference-implementation'][run_state['script_uid']] = {} + version_info = {} + version_info[run_state['script_uid']] = run_state['version_info'] + + state['mlperf-inference-implementation']['version_info'] = version_info + return {'return':0} From 166b3c2f7a718a0818f0a4559372c61edcee218f Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 17:02:12 +0000 Subject: [PATCH 18/33] Added dump pip version script --- cm-mlops/script/dump-pip-freeze/_cm.yaml | 16 ++++++++++ cm-mlops/script/dump-pip-freeze/customize.py | 32 ++++++++++++++++++++ cm-mlops/script/dump-pip-freeze/run.sh | 28 +++++++++++++++++ 3 files changed, 76 insertions(+) create mode 100644 cm-mlops/script/dump-pip-freeze/_cm.yaml create mode 100644 cm-mlops/script/dump-pip-freeze/customize.py create mode 100644 cm-mlops/script/dump-pip-freeze/run.sh diff --git a/cm-mlops/script/dump-pip-freeze/_cm.yaml b/cm-mlops/script/dump-pip-freeze/_cm.yaml new file mode 100644 index 0000000000..39acd5eee3 --- /dev/null +++ b/cm-mlops/script/dump-pip-freeze/_cm.yaml @@ -0,0 +1,16 @@ +alias: dump-pip-freeze +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +tags: +- dump +- pip +- freeze +new_state_keys: + - pip_freeze +deps: + - tags: get,python + names: + - python + - python3 +uid: 33eb0a8006664cae diff --git a/cm-mlops/script/dump-pip-freeze/customize.py b/cm-mlops/script/dump-pip-freeze/customize.py new file mode 100644 index 0000000000..f8cce2f0a8 --- /dev/null +++ b/cm-mlops/script/dump-pip-freeze/customize.py @@ -0,0 +1,32 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + state = i['state'] + + pip_freeze = {} + with open("tmp-pip-freeze", "r") as f: + for line in f.readlines(): + if "==" in line: + split = line.split("==") + pip_freeze[split[0]] = split[1].strip() + + state['pip_freeze'] = pip_freeze + + return {'return':0} diff --git a/cm-mlops/script/dump-pip-freeze/run.sh b/cm-mlops/script/dump-pip-freeze/run.sh new file mode 100644 index 0000000000..a1cdb52eb4 --- /dev/null +++ b/cm-mlops/script/dump-pip-freeze/run.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} + +#To export any variable +#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out + +#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency + + + +function exit_if_error() { + test $? -eq 0 || exit $? +} + +function run() { + echo "Running: " + echo "$1" + echo "" + if [[ ${CM_FAKE_RUN} != 'yes' ]]; then + eval "$1" + exit_if_error + fi +} + +#Add your run commands here... +# run "$CM_RUN_CMD" +run "${CM_PYTHON_BIN_WITH_PATH} ${CM_TMP_CURRENT_SCRIPT_PATH}/dump.py" From 118894f0a7ca698b614577dbaa96938b9cfaf756 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 17:03:57 +0000 Subject: [PATCH 19/33] Dump os,cpu and pip info for mlperf-inference --- .../script/app-mlperf-inference/customize.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index dc8a21b411..a15bf9c0dc 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -324,4 +324,28 @@ def postprocess(i): with open(os.path.join(output_dir, "version_info.json"), "w") as f: f.write(json.dumps(state['mlperf-inference-implementation']['version_info'], indent=2)) + if env.get('CM_DUMP_SYSTEM_INFO', True): + dump_script_output("detect,os", env, state, 'new_env', os.path.join(output_dir, "os_info.json")) + dump_script_output("detect,cpu", env, state, 'new_env', os.path.join(output_dir, "cpu_info.json")) + dump_script_output("dump,pip,freeze", env, state, 'new_state', os.path.join(output_dir, "pip_freeze.json")) + return {'return':0} + +def dump_script_output(script_tags, env, state, output_key, dump_file): + + cm_input = {'action': 'run', + 'automation': 'script', + 'tags': script_tags, + 'env': env, + 'state': state, + 'quiet': True, + 'silent': True, + } + r = cm.access(cm_input) + if r['return'] > 0: + return r + with open(dump_file, "w") as f: + f.write(json.dumps(r[output_key], indent=2)) + + return {'return': 0} + From dafca91b6eba33456d8cbf7971bdee6de8e62701 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 17:08:29 +0000 Subject: [PATCH 20/33] Copies version,os,cpu and pip info files to mlperf inference submission --- .../generate-mlperf-inference-submission/customize.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cm-mlops/script/generate-mlperf-inference-submission/customize.py b/cm-mlops/script/generate-mlperf-inference-submission/customize.py index 64a3b0abfa..5475c53881 100644 --- a/cm-mlops/script/generate-mlperf-inference-submission/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-submission/customize.py @@ -272,6 +272,16 @@ def generate_submission(i): if model == "stable-diffusion-xl" and os.path.exists(os.path.join(result_mode_path, "images")): shutil.copytree(os.path.join(result_mode_path, "images"), os.path.join(submission_results_path, "images")) + elif mode == "performance": + if os.path.exists(os.path.join(result_mode_path, "version_info.json")): + files.append("version_info.json") + if os.path.exists(os.path.join(result_mode_path, "os_info.json")): + files.append("os_info.json") + if os.path.exists(os.path.join(result_mode_path, "cpu_info.json")): + files.append("cpu_info.json") + if os.path.exists(os.path.join(result_mode_path, "pip_freeze.json")): + files.append("pip_freeze.json") + for f in files: print(' * ' + f) p_target = os.path.join(submission_results_path, f) From 69dc5822e40682d769dcc74ebede9911b54ff6c2 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 17:18:05 +0000 Subject: [PATCH 21/33] Fix the mlperf submission generation for log files --- .../customize.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/cm-mlops/script/generate-mlperf-inference-submission/customize.py b/cm-mlops/script/generate-mlperf-inference-submission/customize.py index 5475c53881..1ee5d8d968 100644 --- a/cm-mlops/script/generate-mlperf-inference-submission/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-submission/customize.py @@ -265,6 +265,9 @@ def generate_submission(i): elif f == "README.md": readme = True shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, f)) + elif f in [ "version_info.json", "os_info.json", "cpu_info.json", "pip_freeze.json" ] and mode == "performance": + shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, f)) + if mode == "accuracy": if os.path.exists(os.path.join(result_mode_path, "accuracy.txt")): @@ -272,16 +275,6 @@ def generate_submission(i): if model == "stable-diffusion-xl" and os.path.exists(os.path.join(result_mode_path, "images")): shutil.copytree(os.path.join(result_mode_path, "images"), os.path.join(submission_results_path, "images")) - elif mode == "performance": - if os.path.exists(os.path.join(result_mode_path, "version_info.json")): - files.append("version_info.json") - if os.path.exists(os.path.join(result_mode_path, "os_info.json")): - files.append("os_info.json") - if os.path.exists(os.path.join(result_mode_path, "cpu_info.json")): - files.append("cpu_info.json") - if os.path.exists(os.path.join(result_mode_path, "pip_freeze.json")): - files.append("pip_freeze.json") - for f in files: print(' * ' + f) p_target = os.path.join(submission_results_path, f) From c239e71490c22d0aea9cf3c4dec11c5902231677 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 17:40:46 +0000 Subject: [PATCH 22/33] Save performance and accuracy console logs for mlperf inference runs --- cm-mlops/script/benchmark-program/customize.py | 5 +++++ .../script/generate-mlperf-inference-submission/customize.py | 2 ++ cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml | 1 + .../script/generate-mlperf-inference-user-conf/customize.py | 1 + cm-mlops/script/run-mlperf-inference-app/_cm.yaml | 1 + 5 files changed, 10 insertions(+) diff --git a/cm-mlops/script/benchmark-program/customize.py b/cm-mlops/script/benchmark-program/customize.py index 2593ebbac4..b4bb9e3f40 100644 --- a/cm-mlops/script/benchmark-program/customize.py +++ b/cm-mlops/script/benchmark-program/customize.py @@ -34,12 +34,17 @@ def preprocess(i): if env.get('CM_RUN_DIR','') == '': env['CM_RUN_DIR'] = os.getcwd() + env['CM_RUN_CMD'] = CM_RUN_PREFIX + ' ' + os.path.join(env['CM_RUN_DIR'],env['CM_BIN_NAME']) + ' ' + env['CM_RUN_SUFFIX'] x = env.get('CM_RUN_PREFIX0','') if x!='': env['CM_RUN_CMD'] = x + ' ' + env.get('CM_RUN_CMD','') + if env.get('CM_HOST_OS_TYPE', '') != 'windows' and str(env.get('CM_SAVE_CONSOLE_LOG', True)).lower() not in [ "no", "false", "0"]: + logs_dir = env.get('CM_LOGS_DIR', env['CM_RUN_DIR']) + env['CM_RUN_CMD'] += " 2>&1 | tee " + os.path.join(logs_dir, "console.out") + # Print info print ('***************************************************************************') print ('CM script::benchmark-program/run.sh') diff --git a/cm-mlops/script/generate-mlperf-inference-submission/customize.py b/cm-mlops/script/generate-mlperf-inference-submission/customize.py index 1ee5d8d968..bfdaa11dca 100644 --- a/cm-mlops/script/generate-mlperf-inference-submission/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-submission/customize.py @@ -267,6 +267,8 @@ def generate_submission(i): shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, f)) elif f in [ "version_info.json", "os_info.json", "cpu_info.json", "pip_freeze.json" ] and mode == "performance": shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, f)) + elif f in [ "console.out" ]: + shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, mode+"_"+f)) if mode == "accuracy": diff --git a/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml b/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml index dedc119fbe..abc43e48b6 100644 --- a/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml +++ b/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml @@ -50,6 +50,7 @@ input_mapping: # Env keys which are exposed to higher level scripts new_env_keys: - CM_MLPERF_* + - CM_LOGS_DIR - CM_HW_* - CM_SUT_* - CM_MAX_EXAMPLES diff --git a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py index 6f1b8b5b39..7ad53e9cc3 100644 --- a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py @@ -216,6 +216,7 @@ def preprocess(i): env['CM_MLPERF_INFERENCE_AUDIT_PATH'] = audit_full_path env['CM_MLPERF_OUTPUT_DIR'] = OUTPUT_DIR + env['CM_LOGS_DIR'] = OUTPUT_DIR env['CM_MLPERF_LOADGEN_LOGS_DIR'] = OUTPUT_DIR run_exists = run_files_exist(log_mode, OUTPUT_DIR, required_files, env) diff --git a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml index 95dcac2dc9..6876897659 100644 --- a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml +++ b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml @@ -45,6 +45,7 @@ input_mapping: device: CM_MLPERF_DEVICE division: CM_MLPERF_SUBMISSION_DIVISION dump_version_info: CM_DUMP_VERSION_INFO + save_console_log: CM_SAVE_CONSOLE_LOG execution_mode: CM_MLPERF_EXECUTION_MODE find_performance: CM_MLPERF_FIND_PERFORMANCE_MODE gpu_name: CM_NVIDIA_GPU_NAME From 48705e1fe3034d9f7898bf05b85779521223165b Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 19:07:21 +0100 Subject: [PATCH 23/33] added CM repo git hash in auto-generated MLPerf readme --- .../script/app-mlperf-inference/customize.py | 47 ++++++++++++++++++- cmr.yaml | 5 +- docs/mlperf/README.md | 2 +- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 06dc639948..a4b81d77d1 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -197,6 +197,46 @@ def postprocess(i): shutil.copy(env['CM_MLPERF_USER_CONF'], 'user.conf') + # Record basic host info + host_info = { + "os_version":platform.platform(), + "cpu_version":platform.processor(), + "python_version":sys.version, + "cm_version":cm.__version__ + } + + x = '' + if env.get('CM_HOST_OS_FLAVOR','')!='': x+=env['CM_HOST_OS_FLAVOR'] + if env.get('CM_HOST_OS_VERSION','')!='': x+=' '+env['CM_HOST_OS_VERSION'] + if x!='': host_info['os_version_sys'] = x + + if env.get('CM_HOST_SYSTEM_NAME','')!='': host_info['system_name']=env['CM_HOST_SYSTEM_NAME'] + + with open ("cm-host-info.json", "w") as fp: + fp.write(json.dumps(host_info, indent=2)+'\n') + + # Check CM automation repository + repo_name = 'mlcommons@ck' + repo_hash = '' + r = cm.access({'action':'find', 'automation':'repo', 'artifact':'mlcommons@ck,a4705959af8e447a'}) + if r['return']==0 and len(r['list'])==1: + repo_path = r['list'][0].path + if os.path.isdir(repo_path): + repo_name = os.path.basename(repo_path) + + # Check Grigori's dev + if repo_name == 'ck': repo_name = 'ctuning@mlcommons-ck' + + cur_dir_tmp=os.getcwd() + + os.chdir(repo_path) + + x = subprocess.run('git rev-parse HEAD', capture_output=True, text=True) + repo_hash = x.stdout.strip() + + os.chdir(cur_dir_tmp) + + # Prepare README if "cmd" in inp: cmd = "cm run script \\\n\t"+" \\\n\t".join(inp['cmd']) xcmd = "cm run script "+xsep+"\n\t" + (" "+xsep+"\n\t").join(inp['cmd']) @@ -206,11 +246,16 @@ def postprocess(i): readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/ck).\n\n" + readme_init+= "*Check [CM MLPerf docs](https://github.com/mlcommons/ck/tree/master/docs/mlperf) for more details.*\n\n" + readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), platform.processor(), sys.version, cm.__version__) + x = repo_name + if repo_hash!='': x+=' --checkout='+str(repo_hash) + readme_body += "## CM Run Command\n\nSee [CM installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md).\n\n"+ \ - "```bash\npip install cmind\n\ncm pull repo mlcommons@ck\n\n{}\n```".format(xcmd) + "```bash\npip install cmind\n\ncm pull repo {}\n\n{}\n```".format(x, xcmd) if env.get('CM_MLPERF_README', '') == "yes": readme_body += "\n## Dependent CM scripts\n\n" diff --git a/cmr.yaml b/cmr.yaml index 48ae611d66..994aaf98fa 100644 --- a/cmr.yaml +++ b/cmr.yaml @@ -1,5 +1,8 @@ alias: mlcommons@ck +uid: a4705959af8e447a + git: true + prefix: cm-mlops -uid: a4705959af8e447a + version: 1.6.2.1 diff --git a/docs/mlperf/README.md b/docs/mlperf/README.md index 3e6dbfe114..51d2859931 100644 --- a/docs/mlperf/README.md +++ b/docs/mlperf/README.md @@ -1,6 +1,6 @@ [ [Back to CM documentation](../README.md) ] -# How to run and customize MLPerf benchmarks? +# Run and customize MLPerf benchmarks using the MLCommons CM automation framework This documentation explains how to run, customize and extend MLPerf benchmarks in a unified way across diverse models, data sets, software and hardware from different vendors From 5fc7a5cf6fbf6b03ef725746687f55312c075202 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 19:11:46 +0100 Subject: [PATCH 24/33] add clean CM cache instruction --- cm-mlops/script/app-mlperf-inference/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 8ff2d2be62..087d8dace0 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -255,7 +255,7 @@ def postprocess(i): if repo_hash!='': x+=' --checkout='+str(repo_hash) readme_body += "## CM Run Command\n\nSee [CM installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md).\n\n"+ \ - "```bash\npip install cmind\n\ncm pull repo {}\n\n{}\n```".format(x, xcmd) + "```bash\npip install cmind\n\ncm rm cache -f\n\ncm pull repo {}\n\n{}\n```".format(x, xcmd) if env.get('CM_MLPERF_README', '') == "yes": readme_body += "\n## Dependent CM scripts\n\n" From 4846e3bd6bc7f2f094d72a14af098b3372f874fd Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 18:35:49 +0000 Subject: [PATCH 25/33] Add the missed dump.py script --- cm-mlops/script/dump-pip-freeze/dump.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 cm-mlops/script/dump-pip-freeze/dump.py diff --git a/cm-mlops/script/dump-pip-freeze/dump.py b/cm-mlops/script/dump-pip-freeze/dump.py new file mode 100644 index 0000000000..d5989a4a0a --- /dev/null +++ b/cm-mlops/script/dump-pip-freeze/dump.py @@ -0,0 +1,6 @@ +from pip._internal.operations import freeze + +pkgs = freeze.freeze() +with open('tmp-pip-freeze', "w") as f: + for pkg in pkgs: + f.write(pkg+"\n") From a7b4b32e45ed9b8bbb82167ce0f1d9360409ca05 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 19:37:01 +0100 Subject: [PATCH 26/33] fixed git revision detection --- cm-mlops/automation/utils/module.py | 66 +++++++++++++++++++ .../script/app-mlperf-inference/customize.py | 14 ++-- cm-mlops/script/dump-pip-freeze/customize.py | 11 ++-- 3 files changed, 78 insertions(+), 13 deletions(-) diff --git a/cm-mlops/automation/utils/module.py b/cm-mlops/automation/utils/module.py index 2b479d5362..901fd35ddf 100644 --- a/cm-mlops/automation/utils/module.py +++ b/cm-mlops/automation/utils/module.py @@ -878,3 +878,69 @@ def uid(self, i): return r + + ############################################################################## + def system(self, i): + """ + Run system command and redirect output to strng. + + Args: + (CM input dict): + + * cmd (str): command line + * (path) (str): go to this directory and return back to current + + Returns: + (CM return dict): + + * return (int): return code == 0 if no error and >0 if error + * (error) (str): error string if return>0 + + * ret (int): return code + * std (str): stdout + stderr + * stdout (str): stdout + * stderr (str): stderr + """ + + cmd = i['cmd'] + + if cmd == '': + return {'return':1, 'error': 'cmd is empty'} + + path = i.get('path','') + if path!='' and os.path.isdir(path): + cur_dir = os.getcwd() + os.chdir(path) + + r = utils.gen_tmp_file({}) + if r['return'] > 0: return r + fn1 = r['file_name'] + + r = utils.gen_tmp_file({}) + if r['return'] > 0: return r + fn2 = r['file_name'] + + cmd += ' > '+fn1 + ' 2> '+fn2 + rx = os.system(cmd) + + std = '' + stdout = '' + stderr = '' + + if os.path.isfile(fn1): + r = utils.load_txt(file_name = fn1, remove_after_read = True) + if r['return'] == 0: stdout = r['string'].strip() + + if os.path.isfile(fn2): + r = utils.load_txt(file_name = fn2, remove_after_read = True) + if r['return'] == 0: stderr = r['string'].strip() + + std = stdout + if stderr!='': + if std!='': std+='\n' + std+=stderr + + if path!='' and os.path.isdir(path): + os.chdir(cur_dir) + + return {'return':0, 'ret':rx, 'stdout':stdout, 'stderr':stderr, 'std':std} diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 087d8dace0..755a1c19f8 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -227,14 +227,12 @@ def postprocess(i): # Check Grigori's dev if repo_name == 'ck': repo_name = 'ctuning@mlcommons-ck' - cur_dir_tmp=os.getcwd() - - os.chdir(repo_path) - - x = subprocess.run('git rev-parse HEAD', capture_output=True, text=True) - repo_hash = x.stdout.strip() - - os.chdir(cur_dir_tmp) + r = cm.access({'action':'system', + 'automation':'utils', + 'path':repo_path, + 'cmd':'git rev-parse HEAD'}) + if r['return'] == 0 and r['ret'] == 0: + repo_hash = r['stdout'] # Prepare README if "cmd" in inp: diff --git a/cm-mlops/script/dump-pip-freeze/customize.py b/cm-mlops/script/dump-pip-freeze/customize.py index f8cce2f0a8..f16a99dc1c 100644 --- a/cm-mlops/script/dump-pip-freeze/customize.py +++ b/cm-mlops/script/dump-pip-freeze/customize.py @@ -21,11 +21,12 @@ def postprocess(i): state = i['state'] pip_freeze = {} - with open("tmp-pip-freeze", "r") as f: - for line in f.readlines(): - if "==" in line: - split = line.split("==") - pip_freeze[split[0]] = split[1].strip() + if os.path.isfile('tmp-pip-freeze'): + with open("tmp-pip-freeze", "r") as f: + for line in f.readlines(): + if "==" in line: + split = line.split("==") + pip_freeze[split[0]] = split[1].strip() state['pip_freeze'] = pip_freeze From 341d320da89276c3ffafa9135e85a5734bf60424 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 12 Feb 2024 20:01:56 +0100 Subject: [PATCH 27/33] fixed pip freeze on windows --- cm-mlops/automation/utils/module.py | 30 ++++++++++++++------ cm-mlops/script/dump-pip-freeze/customize.py | 15 ++++++++++ cm-mlops/script/dump-pip-freeze/dump.py | 17 +++++++++-- cm-mlops/script/dump-pip-freeze/run.bat | 4 +++ 4 files changed, 55 insertions(+), 11 deletions(-) create mode 100644 cm-mlops/script/dump-pip-freeze/run.bat diff --git a/cm-mlops/automation/utils/module.py b/cm-mlops/automation/utils/module.py index 901fd35ddf..9ead9f3629 100644 --- a/cm-mlops/automation/utils/module.py +++ b/cm-mlops/automation/utils/module.py @@ -889,6 +889,8 @@ def system(self, i): * cmd (str): command line * (path) (str): go to this directory and return back to current + * (stdout) (str): stdout file + * (stderr) (str): stderr file Returns: (CM return dict): @@ -912,13 +914,23 @@ def system(self, i): cur_dir = os.getcwd() os.chdir(path) - r = utils.gen_tmp_file({}) - if r['return'] > 0: return r - fn1 = r['file_name'] - - r = utils.gen_tmp_file({}) - if r['return'] > 0: return r - fn2 = r['file_name'] + if i.get('stdout','')!='': + fn1=i['stdout'] + fn1_delete = False + else: + r = utils.gen_tmp_file({}) + if r['return'] > 0: return r + fn1 = r['file_name'] + fn1_delete = True + + if i.get('stderr','')!='': + fn2=i['stderr'] + fn2_delete = False + else: + r = utils.gen_tmp_file({}) + if r['return'] > 0: return r + fn2 = r['file_name'] + fn2_delete = True cmd += ' > '+fn1 + ' 2> '+fn2 rx = os.system(cmd) @@ -928,11 +940,11 @@ def system(self, i): stderr = '' if os.path.isfile(fn1): - r = utils.load_txt(file_name = fn1, remove_after_read = True) + r = utils.load_txt(file_name = fn1, remove_after_read = fn1_delete) if r['return'] == 0: stdout = r['string'].strip() if os.path.isfile(fn2): - r = utils.load_txt(file_name = fn2, remove_after_read = True) + r = utils.load_txt(file_name = fn2, remove_after_read = fn2_delete) if r['return'] == 0: stderr = r['string'].strip() std = stdout diff --git a/cm-mlops/script/dump-pip-freeze/customize.py b/cm-mlops/script/dump-pip-freeze/customize.py index f16a99dc1c..aef0a981df 100644 --- a/cm-mlops/script/dump-pip-freeze/customize.py +++ b/cm-mlops/script/dump-pip-freeze/customize.py @@ -20,7 +20,21 @@ def postprocess(i): env = i['env'] state = i['state'] + os_info = i['os_info'] + + automation = i['automation'] + pip_freeze = {} + if not os.path.isfile('tmp-pip-freeze'): + # If was not created, sometimes issues on Windows + # There is another workaround + if os_info['platform'] == 'windows': + r = automation.cmind.access({'action':'system', + 'automation':'utils', + 'cmd':'py -m pip freeze', + 'stdout':'tmp-pip-freeze'}) + # skip output + if os.path.isfile('tmp-pip-freeze'): with open("tmp-pip-freeze", "r") as f: for line in f.readlines(): @@ -28,6 +42,7 @@ def postprocess(i): split = line.split("==") pip_freeze[split[0]] = split[1].strip() + state['pip_freeze'] = pip_freeze return {'return':0} diff --git a/cm-mlops/script/dump-pip-freeze/dump.py b/cm-mlops/script/dump-pip-freeze/dump.py index d5989a4a0a..d74507ccf6 100644 --- a/cm-mlops/script/dump-pip-freeze/dump.py +++ b/cm-mlops/script/dump-pip-freeze/dump.py @@ -1,6 +1,19 @@ +import os from pip._internal.operations import freeze +if os.path.isfile('tmp-pip-freeze'): + os.remove('tmp-pip-freeze') + pkgs = freeze.freeze() -with open('tmp-pip-freeze', "w") as f: + +x = '' + +try: for pkg in pkgs: - f.write(pkg+"\n") + x+=pkg+'\n' +except: + pass + +if len(x)>0: + with open('tmp-pip-freeze', "w") as f: + f.write(x) diff --git a/cm-mlops/script/dump-pip-freeze/run.bat b/cm-mlops/script/dump-pip-freeze/run.bat new file mode 100644 index 0000000000..b323ddc22e --- /dev/null +++ b/cm-mlops/script/dump-pip-freeze/run.bat @@ -0,0 +1,4 @@ +if not "%CM_FAKE_RUN%" == "yes" ( + %CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\dump.py + IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% +) From d1e78034baf40cb6cfdb3e8fbc7841631bd035e0 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 12 Feb 2024 20:35:27 +0000 Subject: [PATCH 28/33] Improve the measurement readme generation --- .../customize.py | 123 ++++++++++++++++-- 1 file changed, 115 insertions(+), 8 deletions(-) diff --git a/cm-mlops/script/generate-mlperf-inference-submission/customize.py b/cm-mlops/script/generate-mlperf-inference-submission/customize.py index bfdaa11dca..d9a3f7a7c3 100644 --- a/cm-mlops/script/generate-mlperf-inference-submission/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-submission/customize.py @@ -172,6 +172,7 @@ def generate_submission(i): scenarios.append("multistream") modes = [f for f in os.listdir(result_scenario_path) if not os.path.isfile(os.path.join(result_scenario_path, f))] + power_run = False for mode in modes: result_mode_path = os.path.join(result_scenario_path, mode) submission_mode_path = os.path.join(submission_scenario_path, mode) @@ -188,7 +189,6 @@ def generate_submission(i): os.makedirs(submission_measurement_path) if mode=='performance': - power_run = False if os.path.exists(os.path.join(result_mode_path, "power")): power_run = True @@ -262,10 +262,7 @@ def generate_submission(i): files.append(f) elif f == "spl.txt": files.append(f) - elif f == "README.md": - readme = True - shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, f)) - elif f in [ "version_info.json", "os_info.json", "cpu_info.json", "pip_freeze.json" ] and mode == "performance": + elif f in [ "README.md", "version_info.json", "os_info.json", "cpu_info.json", "pip_freeze.json" ] and mode == "performance": shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, f)) elif f in [ "console.out" ]: shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, mode+"_"+f)) @@ -282,12 +279,122 @@ def generate_submission(i): p_target = os.path.join(submission_results_path, f) shutil.copy(os.path.join(result_mode_path, f), p_target) - if not readme and mode in [ "performance", "accuracy" ]: - with open(os.path.join(submission_measurement_path, "README.md"), mode='w') as f: - f.write("TBD") #create an empty README + readme_file = os.path.join(submission_measurement_path, "README.md") + if not os.path.exists(readme_file): + with open(readme_file, mode='w') as f: + f.write("TBD") #create an empty README + else: + readme_suffix = "" + result_string = get_result_string(env['CM_MLPERF_LAST_RELEASE'], model, scenario, result_scenario_path, power_run) + with open(readme_file, mode='a') as f: + f.write(result_string) + return {'return':0} + +def get_accuracy_metric(config, model, path): + + import submission_checker as checker + import re + is_valid = False + all_accuracy_valid = True + acc = None + result_acc = None + target = config.get_accuracy_target(model) + acc_upper_limit = config.get_accuracy_upper_limit(model) + patterns = [] + acc_targets = [] + acc_limits = [] + up_patterns = [] + acc_types = [] + + if acc_upper_limit is not None: + acc_limit_check = True + for i in range(0, len(acc_upper_limit), 2): + acc_type, acc_target = acc_upper_limit[i:i+2] + acc_limits.append(acc_target) + up_patterns.append(checker.ACC_PATTERN[acc_type]) + + for i in range(0, len(target), 2): + acc_type, acc_target = target[i:i+2] + acc_types.append(acc_type) + patterns.append(checker.ACC_PATTERN[acc_type]) + acc_targets.append(acc_target) + + acc_seen = [False for _ in acc_targets] + acc_results = {} + with open(os.path.join(path, "accuracy.txt"), "r", encoding="utf-8") as f: + for line in f: + for i, (pattern, acc_target, acc_type) in enumerate(zip(patterns, acc_targets, acc_types)): + m = re.match(pattern, line) + if m: + acc = m.group(1) + + acc_results[acc_type] = acc + + if acc is not None and float(acc) >= acc_target: + all_accuracy_valid &= True + acc_seen[i] = True + elif acc is not None: + all_accuracy_valid = False + #log.warning("%s accuracy not met: expected=%f, found=%s", path, acc_target, acc) + if i == 0 and acc: + result_acc = acc + acc = None + if acc_upper_limit is not None: + for i, (pattern, acc_limit) in enumerate(zip(up_patterns, acc_limits)): + m = re.match(pattern, line) + if m: + acc = m.group(1) + if acc is not None and acc_upper_limit is not None and float(acc) > acc_limit: + acc_limit_check = False + #log.warning("%s accuracy not met: upper limit=%f, found=%s", path, acc_limit, acc) + acc = None + if all(acc_seen): + break; + is_valid = all_accuracy_valid & all(acc_seen) + if acc_upper_limit is not None: + is_valid &= acc_limit_check + + + return acc_results, acc_targets, acc_limits, up_patterns + + +def get_result_string(version, model, scenario, result_path, has_power): + import submission_checker as checker + config = checker.Config( + version, + None, + ignore_uncommited=False, + skip_power_check=False, + ) + mlperf_model = config.get_mlperf_model(model) + performance_path = os.path.join(result_path, "performance", "run_1") + accuracy_path = os.path.join(result_path, "accuracy") + performance_result = checker.get_performance_metric(config, mlperf_model, performance_path, scenario, None, None, has_power) + if has_power: + is_valid, power_metric, scenario, avg_power_efficiency = checker.get_power_metric(config, scenario, performance_path, True, performance_result) + power_result_string = power_metric + acc_results, acc_targets, acc_limits, up_patterns = get_accuracy_metric(config, mlperf_model, accuracy_path) + + result_field = checker.RESULT_FIELD[checker.SCENARIO_MAPPING[scenario]] + + performance_result_string = f"{result_field}: {performance_result}\n" + accuracy_result_string = '' + for i, acc in enumerate(acc_results): + accuracy_result_string += f"{acc}: {acc_results[acc]}" + if not up_patterns: + accuracy_result_string += f", Required accuracy for closed division >= {round(acc_targets[i], 5)}" + + result_string = "\n\n## Results \n" + result_string += "### Accuracy Results \n" + accuracy_result_string + result_string += "\n\n### Performance Results \n" + performance_result_string + if has_power: + result_string += "\n\n### Power Results \n" + power_result_string + + return result_string + def postprocess(i): r = generate_submission(i) From 1f539390ed75574073a210c07c571a6224c52748 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 13 Feb 2024 12:19:18 +0000 Subject: [PATCH 29/33] Fixed cm_version_info dump - added script variations, seperated mlperf inference readmes into 2 --- cm-mlops/automation/script/module.py | 12 ++++++++---- .../app-mlperf-inference-reference/customize.py | 3 +-- cm-mlops/script/app-mlperf-inference/customize.py | 15 ++++++++++----- .../customize.py | 2 +- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index 69ee3ad9aa..0f83a775c1 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -643,7 +643,9 @@ def run(self, i): if i.get('help',False): return utils.call_internal_module(self, __file__, 'module_help', 'print_help', {'meta':meta, 'path':path}) - run_state['script_uid'] = meta['uid'] + run_state['script_id'] = meta['alias'] + "," + meta['uid'] + run_state['script_variation_tags'] = variation_tags + deps = meta.get('deps',[]) post_deps = meta.get('post_deps',[]) prehook_deps = meta.get('prehook_deps',[]) @@ -1594,7 +1596,6 @@ def run(self, i): version_info[version_info_tags]['script_alias'] = script_alias version_info[version_info_tags]['version'] = version version_info[version_info_tags]['parent'] = run_state['parent'] - run_state['version_info'].append(version_info) script_versions = detected_versions.get(meta['uid'], []) if not script_versions: @@ -2702,7 +2703,9 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a tmp_run_state_deps = copy.deepcopy(run_state['deps']) run_state['deps'] = [] tmp_parent = run_state['parent'] - run_state['parent'] = run_state['script_uid'] + run_state['parent'] = run_state['script_id']+":"+",".join(run_state['script_variation_tags']) + tmp_script_id = run_state['script_id'] + tmp_script_variation_tags = run_state['script_variation_tags'] # Run collective script via CM API: # Not very efficient but allows logging - can be optimized later @@ -2738,12 +2741,13 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a run_state['deps'] = tmp_run_state_deps run_state['parent'] = tmp_parent + run_state['script_id'] = tmp_script_id + run_state['script_variation_tags'] = tmp_script_variation_tags # Restore local env env.update(tmp_env) update_env_with_values(env) - return {'return': 0} ############################################################################## diff --git a/cm-mlops/script/app-mlperf-inference-reference/customize.py b/cm-mlops/script/app-mlperf-inference-reference/customize.py index e253296a12..c5bf54ede3 100644 --- a/cm-mlops/script/app-mlperf-inference-reference/customize.py +++ b/cm-mlops/script/app-mlperf-inference-reference/customize.py @@ -390,9 +390,8 @@ def postprocess(i): if not state.get('mlperf-inference-implementation', {}): state['mlperf-inference-implementation'] = {} run_state = i['run_script_input']['run_state'] - state['mlperf-inference-implementation'][run_state['script_uid']] = {} version_info = {} - version_info[run_state['script_uid']] = run_state['version_info'] + version_info[run_state['script_id']+":"+",".join(run_state['script_variation_tags'])] = run_state['version_info'] state['mlperf-inference-implementation']['version_info'] = version_info diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 755a1c19f8..139fbe1e80 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -255,8 +255,10 @@ def postprocess(i): readme_body += "## CM Run Command\n\nSee [CM installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md).\n\n"+ \ "```bash\npip install cmind\n\ncm rm cache -f\n\ncm pull repo {}\n\n{}\n```".format(x, xcmd) + extra_readme_init = '' + extra_readme_body = '' if env.get('CM_MLPERF_README', '') == "yes": - readme_body += "\n## Dependent CM scripts\n\n" + extra_readme_body += "\n## Dependent CM scripts\n\n" script_tags = inp['tags'] script_adr = inp.get('adr', {}) @@ -278,23 +280,26 @@ def postprocess(i): print_deps = r['new_state']['print_deps'] count = 1 for dep in print_deps: - readme_body += "\n\n" + str(count) +". `" +dep+ "`\n" + extra_readme_body += "\n\n" + str(count) +". `" +dep+ "`\n" count = count+1 if state.get('mlperf-inference-implementation') and state['mlperf-inference-implementation'].get('print_deps'): - readme_body += "\n## Dependent CM scripts for the MLPerf Inference Implementation\n" + extra_readme_body += "\n## Dependent CM scripts for the MLPerf Inference Implementation\n" print_deps = state['mlperf-inference-implementation']['print_deps'] count = 1 for dep in print_deps: - readme_body += "\n\n" + str(count) +". `" +dep+"`\n" + extra_readme_body += "\n\n" + str(count) +". `" +dep+"`\n" count = count+1 readme = readme_init + readme_body + extra_readme = extra_readme_init + extra_readme_body with open ("README.md", "w") as fp: fp.write(readme) + with open ("README-extra.md", "w") as fp: + fp.write(extra_readme) elif mode == "compliance": @@ -364,7 +369,7 @@ def postprocess(i): env['CM_MLPERF_ACCURACY_RESULTS_DIR'] = accuracy_result_dir if state.get('mlperf-inference-implementation') and state['mlperf-inference-implementation'].get('version_info'): - with open(os.path.join(output_dir, "version_info.json"), "w") as f: + with open(os.path.join(output_dir, "cm_version_info.json"), "w") as f: f.write(json.dumps(state['mlperf-inference-implementation']['version_info'], indent=2)) if env.get('CM_DUMP_SYSTEM_INFO', True): diff --git a/cm-mlops/script/generate-mlperf-inference-submission/customize.py b/cm-mlops/script/generate-mlperf-inference-submission/customize.py index d9a3f7a7c3..f84901cd2a 100644 --- a/cm-mlops/script/generate-mlperf-inference-submission/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-submission/customize.py @@ -262,7 +262,7 @@ def generate_submission(i): files.append(f) elif f == "spl.txt": files.append(f) - elif f in [ "README.md", "version_info.json", "os_info.json", "cpu_info.json", "pip_freeze.json" ] and mode == "performance": + elif f in [ "README.md", "README-extra.md", "cm_version_info.json", "os_info.json", "cpu_info.json", "pip_freeze.json" ] and mode == "performance": shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, f)) elif f in [ "console.out" ]: shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, mode+"_"+f)) From bd5590b42706d89b120ab63b5afc414c1620d068 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 13 Feb 2024 14:26:17 +0100 Subject: [PATCH 30/33] added extra notes about submission and info about inference/power Git revision --- cm-mlops/automation/utils/module.py | 2 +- .../script/app-mlperf-inference/customize.py | 68 +++++++++++++++++-- .../customize.py | 2 +- 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/cm-mlops/automation/utils/module.py b/cm-mlops/automation/utils/module.py index 9ead9f3629..45d8802283 100644 --- a/cm-mlops/automation/utils/module.py +++ b/cm-mlops/automation/utils/module.py @@ -882,7 +882,7 @@ def uid(self, i): ############################################################################## def system(self, i): """ - Run system command and redirect output to strng. + Run system command and redirect output to string. Args: (CM input dict): diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 139fbe1e80..8a4000899f 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -212,9 +212,6 @@ def postprocess(i): if env.get('CM_HOST_SYSTEM_NAME','')!='': host_info['system_name']=env['CM_HOST_SYSTEM_NAME'] - with open ("cm-host-info.json", "w") as fp: - fp.write(json.dumps(host_info, indent=2)+'\n') - # Check CM automation repository repo_name = 'mlcommons@ck' repo_hash = '' @@ -234,6 +231,61 @@ def postprocess(i): if r['return'] == 0 and r['ret'] == 0: repo_hash = r['stdout'] + host_info['cm_repo_name'] = repo_name + host_info['cm_repo_git_hash'] = repo_hash + + # Check a few important MLCommons repos + xhashes = [] + md_xhashes = '' + + for x in [('get,git,inference', ['inference']), + ('get,git,mlperf,power', ['power-dev'])]: + xtags = x[0] + xdirs = x[1] + + rx = cm.access({'action':'find', 'automation':'cache', 'tags':xtags}) + if rx['return']>0: return rx + for cache in rx['list']: + xurl = '' + xhash = '' + + for xd in xdirs: + xpath = os.path.join(cache.path, xd) + print (xpath) + if os.path.isdir(xpath): + r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git rev-parse HEAD'}) + if r['return'] == 0 and r['ret'] == 0: + xhash = r['stdout'] + + r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git config --get remote.origin.url'}) + if r['return'] == 0 and r['ret'] == 0: + xurl = r['stdout'] + + if xurl!='' and xhash!='': + break + + if xurl!='' and xhash!='': + # Check if doesn't exist + found = False + + for xh in xhashes: + if xh['mlcommons_git_url'] == xurl and xh['mlcommons_git_hash'] == xhash: + found = True + break + + if not found: + xhashes.append({'mlcommons_git_url': xurl, + 'mlcommons_git_hash': xhash, + 'cm_cache_tags':cache.meta['tags']}) + + md_xhashes +='* MLCommons Git {} ({})\n'.format(xurl, xhash) + + if len(xhashes)>0: + host_info['mlcommons_repos'] = xhashes + + with open ("cm-host-info.json", "w") as fp: + fp.write(json.dumps(host_info, indent=2)+'\n') + # Prepare README if "cmd" in inp: cmd = "cm run script \\\n\t"+" \\\n\t".join(inp['cmd']) @@ -246,8 +298,8 @@ def postprocess(i): readme_init+= "*Check [CM MLPerf docs](https://github.com/mlcommons/ck/tree/master/docs/mlperf) for more details.*\n\n" - readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), - platform.processor(), sys.version, cm.__version__) + readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n{}\n\n".format(platform.platform(), + platform.processor(), sys.version, cm.__version__, md_xhashes) x = repo_name if repo_hash!='': x+=' --checkout='+str(repo_hash) @@ -255,6 +307,10 @@ def postprocess(i): readme_body += "## CM Run Command\n\nSee [CM installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md).\n\n"+ \ "```bash\npip install cmind\n\ncm rm cache -f\n\ncm pull repo {}\n\n{}\n```".format(x, xcmd) + readme_body += "\n*Note that if you want to use the [latest automation recipes](https://access.cknowledge.org/playground/?action=scripts) for MLPerf (CM scripts),\n"+ \ + " you should simply reload {} without checkout and clean CM cache as follows:*\n\n".format(repo_name) + \ + "```bash\ncm rm repo {}\ncm pull repo {}\ncm rm cache -f\n\n```".format(repo_name, repo_name) + extra_readme_init = '' extra_readme_body = '' if env.get('CM_MLPERF_README', '') == "yes": @@ -369,7 +425,7 @@ def postprocess(i): env['CM_MLPERF_ACCURACY_RESULTS_DIR'] = accuracy_result_dir if state.get('mlperf-inference-implementation') and state['mlperf-inference-implementation'].get('version_info'): - with open(os.path.join(output_dir, "cm_version_info.json"), "w") as f: + with open(os.path.join(output_dir, "cm-version-info.json"), "w") as f: f.write(json.dumps(state['mlperf-inference-implementation']['version_info'], indent=2)) if env.get('CM_DUMP_SYSTEM_INFO', True): diff --git a/cm-mlops/script/generate-mlperf-inference-submission/customize.py b/cm-mlops/script/generate-mlperf-inference-submission/customize.py index f84901cd2a..ddf4d7656e 100644 --- a/cm-mlops/script/generate-mlperf-inference-submission/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-submission/customize.py @@ -262,7 +262,7 @@ def generate_submission(i): files.append(f) elif f == "spl.txt": files.append(f) - elif f in [ "README.md", "README-extra.md", "cm_version_info.json", "os_info.json", "cpu_info.json", "pip_freeze.json" ] and mode == "performance": + elif f in [ "README.md", "README-extra.md", "cm-version-info.json", "os_info.json", "cpu_info.json", "pip_freeze.json" ] and mode == "performance": shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, f)) elif f in [ "console.out" ]: shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, mode+"_"+f)) From 2cbf7a7b4bc022da624597e8f4e51a5cf5b70d9c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 13 Feb 2024 13:38:43 +0000 Subject: [PATCH 31/33] Added script to dump mlperf-run-state, use cache for mlperf results, version info cleanups --- cm-mlops/automation/script/module.py | 2 + .../script/app-mlperf-inference-cpp/_cm.yaml | 3 + .../app-mlperf-inference-cpp/customize.py | 33 ---------- .../app-mlperf-inference-reference/_cm.yaml | 5 +- .../customize.py | 32 +--------- .../app-mlperf-inference-tflite-cpp/_cm.json | 6 ++ .../customize.py | 33 ---------- .../script/app-mlperf-inference/customize.py | 8 ++- .../_cm.yaml | 6 +- .../customize.py | 4 +- .../reproduce-mlperf-inference-intel/_cm.yaml | 3 + .../customize.py | 33 ---------- .../_cm.yaml | 4 +- .../customize.py | 33 ---------- .../_cm.yaml | 3 + .../customize.py | 23 ------- .../script/run-mlperf-inference-app/_cm.yaml | 6 ++ .../run-mlperf-inference-app/customize.py | 6 +- .../_cm.yaml | 13 ++++ .../customize.py | 63 +++++++++++++++++++ 20 files changed, 124 insertions(+), 195 deletions(-) create mode 100644 cm-mlops/script/save-mlperf-inference-implementation-state/_cm.yaml create mode 100644 cm-mlops/script/save-mlperf-inference-implementation-state/customize.py diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index 0f83a775c1..0a51cff7eb 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -1316,6 +1316,8 @@ def run(self, i): utils.merge_dicts({'dict1':env, 'dict2':const, 'append_lists':True, 'append_unique':True}) utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True}) + run_script_input['run_state'] = run_state + ii = copy.deepcopy(customize_common_input) ii['env'] = env ii['state'] = state diff --git a/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml b/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml index b1cd539b0c..81348987e0 100644 --- a/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml @@ -151,6 +151,9 @@ post_deps: CM_MLPERF_SKIP_RUN: - "yes" + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state # Variations to customize dependencies variations: diff --git a/cm-mlops/script/app-mlperf-inference-cpp/customize.py b/cm-mlops/script/app-mlperf-inference-cpp/customize.py index ddbb6a2fab..bd4910b26d 100644 --- a/cm-mlops/script/app-mlperf-inference-cpp/customize.py +++ b/cm-mlops/script/app-mlperf-inference-cpp/customize.py @@ -84,37 +84,4 @@ def postprocess(i): env = i['env'] state = i['state'] - if env.get('CM_MLPERF_README', '') == "yes": - import cmind as cm - inp = i['input'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'print_deps': True, - 'env': env, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] - - if env.get('CM_DUMP_VERSION_INFO', True): - if not state.get('mlperf-inference-implementation', {}): - state['mlperf-inference-implementation'] = {} - run_state = i['run_script_input']['run_state'] - state['mlperf-inference-implementation'][run_state['script_uid']] = {} - version_info = {} - version_info[run_state['script_uid']] = run_state['version_info'] - - state['mlperf-inference-implementation']['version_info'] = version_info - return {'return':0} diff --git a/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml b/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml index 31dc6ed7ea..0e617ad49f 100644 --- a/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml @@ -574,7 +574,10 @@ posthook_deps: CM_MLPERF_SKIP_RUN: - "on" - +post_deps: + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state # Variations to customize dependencies variations: diff --git a/cm-mlops/script/app-mlperf-inference-reference/customize.py b/cm-mlops/script/app-mlperf-inference-reference/customize.py index c5bf54ede3..f425411d77 100644 --- a/cm-mlops/script/app-mlperf-inference-reference/customize.py +++ b/cm-mlops/script/app-mlperf-inference-reference/customize.py @@ -363,36 +363,6 @@ def postprocess(i): env = i['env'] state = i['state'] - if env.get('CM_MLPERF_README', "") == "yes": - import cmind as cm - inp = i['input'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'env': env, - 'print_deps': True, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] - - if env.get('CM_DUMP_VERSION_INFO', True): - if not state.get('mlperf-inference-implementation', {}): - state['mlperf-inference-implementation'] = {} - run_state = i['run_script_input']['run_state'] - version_info = {} - version_info[run_state['script_id']+":"+",".join(run_state['script_variation_tags'])] = run_state['version_info'] - - state['mlperf-inference-implementation']['version_info'] = version_info + inp = i['input'] return {'return':0} diff --git a/cm-mlops/script/app-mlperf-inference-tflite-cpp/_cm.json b/cm-mlops/script/app-mlperf-inference-tflite-cpp/_cm.json index d43c46d724..318900b7dd 100644 --- a/cm-mlops/script/app-mlperf-inference-tflite-cpp/_cm.json +++ b/cm-mlops/script/app-mlperf-inference-tflite-cpp/_cm.json @@ -252,6 +252,12 @@ "yes" ] } + }, + { + "tags": "save,mlperf,inference,state", + "names": [ + "save-mlperf-inference-state" + ] } ], "tags": [ diff --git a/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py b/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py index dcfcd4c964..ebd588c9f2 100644 --- a/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py +++ b/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py @@ -93,37 +93,4 @@ def postprocess(i): env = i['env'] state = i['state'] - if env.get('CM_MLPERF_README', '') == "yes": - import cmind as cm - inp = i['input'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'env': env, - 'print_deps': True, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] - - if env.get('CM_DUMP_VERSION_INFO', True): - if not state.get('mlperf-inference-implementation', {}): - state['mlperf-inference-implementation'] = {} - run_state = i['run_script_input']['run_state'] - state['mlperf-inference-implementation'][run_state['script_uid']] = {} - version_info = {} - version_info[run_state['script_uid']] = run_state['version_info'] - - state['mlperf-inference-implementation']['version_info'] = version_info - return {'return':0} diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 8a4000899f..d716e1d94f 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -12,6 +12,7 @@ def preprocess(i): env = i['env'] + state = i['state'] if env.get('CM_MLPERF_IMPLEMENTATION', '') == 'nvidia-original': if env.get('CM_NVIDIA_GPU_NAME', '') in [ "rtx_4090", "a100", "t4", "l4", "orin", "custom" ]: @@ -24,7 +25,12 @@ def preprocess(i): env['CM_NVIDIA_HARNESS_GPU_VARIATION'] = '' if 'cmd' in i['input']: - i['state']['mlperf_inference_run_cmd'] = "cm run script " + " ".join(i['input']['cmd']) + state['mlperf_inference_run_cmd'] = "cm run script " + " ".join(i['input']['cmd']) + + state['mlperf-inference-implementation'] = {} + + run_state = i['run_script_input']['run_state'] + state['mlperf-inference-implementation']['script_id'] = run_state['script_id']+":"+",".join(run_state['script_variation_tags']) return {'return':0} diff --git a/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml b/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml index abc43e48b6..9d19ad8fc9 100644 --- a/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml +++ b/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml @@ -73,7 +73,11 @@ deps: - python - python3 - + - tags: get,mlperf,results,dir + names: + - get-mlperf-results-dir + skip_if_env: + OUTPUT_BASE_DIR: [ on ] ######################################################################## # Install MLPerf inference dependencies diff --git a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py index 7ad53e9cc3..38fca24330 100644 --- a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py @@ -35,8 +35,8 @@ def preprocess(i): env['CM_MLPERF_LOADGEN_MODE'] = "accuracy" - if 'OUTPUT_BASE_DIR' not in env: - env['OUTPUT_BASE_DIR'] = os.getcwd() + if env.get('OUTPUT_BASE_DIR', '') == '': + env['OUTPUT_BASE_DIR'] = env.get('CM_MLPERF_INFERENCE_RESULTS_DIR', os.getcwd()) if 'CM_NUM_THREADS' not in env: if 'CM_MINIMIZE_THREADS' in env: diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml index 7658e20e6b..d7916af24b 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml @@ -160,6 +160,9 @@ post_deps: - run_harness tags: benchmark-mlperf + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state # Variations to customize dependencies variations: diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py b/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py index 8d5c16ca74..a924508970 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py @@ -94,37 +94,4 @@ def postprocess(i): env = i['env'] state = i['state'] - if env.get('CM_MLPERF_README', '') == "yes": - import cmind as cm - inp = i['input'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'print_deps': True, - 'env': env, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] - - if env.get('CM_DUMP_VERSION_INFO', True): - if not state.get('mlperf-inference-implementation', {}): - state['mlperf-inference-implementation'] = {} - run_state = i['run_script_input']['run_state'] - state['mlperf-inference-implementation'][run_state['script_uid']] = {} - version_info = {} - version_info[run_state['script_uid']] = run_state['version_info'] - - state['mlperf-inference-implementation']['version_info'] = version_info - return {'return':0} diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index 28ffbf5f4d..9ebae73d23 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -291,7 +291,9 @@ post_deps: enable_if_env: CM_CALL_MLPERF_RUNNER: - yes - + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state # Variations to customize dependencies variations: diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py index 633dfc27e9..ddc8ca961d 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py @@ -418,37 +418,4 @@ def postprocess(i): env = i['env'] state = i['state'] - if env.get('CM_MLPERF_README', '') == "yes": - import cmind as cm - inp = i['input'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'env': env, - 'print_deps': True, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] - - if env.get('CM_DUMP_VERSION_INFO', True): - if not state.get('mlperf-inference-implementation', {}): - state['mlperf-inference-implementation'] = {} - run_state = i['run_script_input']['run_state'] - state['mlperf-inference-implementation'][run_state['script_uid']] = {} - version_info = {} - version_info[run_state['script_uid']] = run_state['version_info'] - - state['mlperf-inference-implementation']['version_info'] = version_info - return {'return':0} diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml index 601e4e0dc2..1b1b0b73ea 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml @@ -261,6 +261,9 @@ post_deps: - yes tags: benchmark-mlperf + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state # Variations to customize dependencies variations: diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py index ae7cc0e40a..d24804eb3a 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py @@ -183,28 +183,5 @@ def preprocess(i): def postprocess(i): env = i['env'] - if env.get('CM_MLPERF_README', '') == "yes": - import cmind as cm - inp = i['input'] - state = i['state'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'print_deps': True, - 'env': env, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] return {'return':0} diff --git a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml index 6876897659..2d1771243f 100644 --- a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml +++ b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml @@ -103,6 +103,12 @@ deps: tags: get,mlcommons,inference,src - tags: get,sut,description +- tags: get,mlperf,inference,results,dir + names: + - get-mlperf-inference-results-dir + skip_if_env: + OUTPUT_BASE_DIR: [ on ] + docker: fake_run_deps: true mounts: diff --git a/cm-mlops/script/run-mlperf-inference-app/customize.py b/cm-mlops/script/run-mlperf-inference-app/customize.py index a7f6e54c17..72f0d11e10 100644 --- a/cm-mlops/script/run-mlperf-inference-app/customize.py +++ b/cm-mlops/script/run-mlperf-inference-app/customize.py @@ -92,9 +92,8 @@ def preprocess(i): else: env['CM_MLPERF_LOADGEN_MODES'] = [ env['CM_MLPERF_LOADGEN_MODE'] ] - - if 'OUTPUT_BASE_DIR' not in env: - env['OUTPUT_BASE_DIR'] = os.getcwd() + if env.get('OUTPUT_BASE_DIR', '') == '': + env['OUTPUT_BASE_DIR'] = env.get('CM_MLPERF_INFERENCE_RESULTS_DIR', os.getcwd()) test_list = ["TEST01", "TEST05"] if env['CM_MODEL'] in ["resnet50"]: @@ -139,6 +138,7 @@ def preprocess(i): print ('=========================================================') local_keys = [ 'CM_MLPERF_SKIP_RUN', 'CM_MLPERF_LOADGEN_QUERY_COUNT' ] + for scenario in env['CM_MLPERF_LOADGEN_SCENARIOS']: scenario_tags = tags + ",_"+scenario.lower() env['CM_MLPERF_LOADGEN_SCENARIO'] = scenario diff --git a/cm-mlops/script/save-mlperf-inference-implementation-state/_cm.yaml b/cm-mlops/script/save-mlperf-inference-implementation-state/_cm.yaml new file mode 100644 index 0000000000..4f1deee8e2 --- /dev/null +++ b/cm-mlops/script/save-mlperf-inference-implementation-state/_cm.yaml @@ -0,0 +1,13 @@ +alias: save-mlperf-inference-implementation-state +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +new_state_keys: + - mlperf-inference-implementation +tags: +- save +- mlperf +- inference +- implementation +- state +uid: b14b813229c444f8 diff --git a/cm-mlops/script/save-mlperf-inference-implementation-state/customize.py b/cm-mlops/script/save-mlperf-inference-implementation-state/customize.py new file mode 100644 index 0000000000..be3be96798 --- /dev/null +++ b/cm-mlops/script/save-mlperf-inference-implementation-state/customize.py @@ -0,0 +1,63 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + state = i['state'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + if not state.get('mlperf-inference-implementation'): #No state information. Just returning + return {'return': 0} + + if env.get('CM_MLPERF_README', "") == "yes": + import cmind as cm + inp = i['input'] + + script_tags = state['mlperf-inference-implementation'].get('script_tags', '') + script_adr = state['mlperf-inference-implementation'].get('script_adr', {}) + + if script_tags != '': + cm_input = {'action': 'run', + 'automation': 'script', + 'tags': script_tags, + 'adr': script_adr, + 'env': env, + 'print_deps': True, + 'quiet': True, + 'silent': True, + 'fake_run': True + } + + r = cm.access(cm_input) + if r['return'] > 0: + return r + + state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + + if env.get('CM_DUMP_VERSION_INFO', True): + + if state['mlperf-inference-implementation'].get('script_id', '') == '': + state['mlperf-inference-implementation']['script_id'] = '' + + script_id = state['mlperf-inference-implementation']['script_id'] + run_state = i['input']['run_state'] + version_info = {} + version_info[script_id] = run_state['version_info'] + + state['mlperf-inference-implementation']['version_info'] = version_info + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} From 9a1f0569312685463c351c6cfd9f0442e76b3717 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 13 Feb 2024 13:56:52 +0000 Subject: [PATCH 32/33] Support git hash export for mlperf inference and power-dev repos --- cm-mlops/script/get-git-repo/customize.py | 5 +++++ cm-mlops/script/get-git-repo/run.sh | 19 ++++++++++++------- .../get-mlperf-inference-src/customize.py | 3 +++ .../script/get-mlperf-power-dev/customize.py | 6 ++++++ 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/cm-mlops/script/get-git-repo/customize.py b/cm-mlops/script/get-git-repo/customize.py index 044b884740..4b9d7863b7 100644 --- a/cm-mlops/script/get-git-repo/customize.py +++ b/cm-mlops/script/get-git-repo/customize.py @@ -59,6 +59,11 @@ def postprocess(i): env['CM_GET_DEPENDENT_CACHED_PATH'] = git_checkout_path + if os.path.exists("tmp-cm-git-hash.out"): + with open("tmp-cm-git-hash.out", "r") as f: + git_hash = f.readline().strip() + env['CM_GIT_REPO_CURRENT_HASH'] = git_hash + return {'return':0} def get_env_key(env): diff --git a/cm-mlops/script/get-git-repo/run.sh b/cm-mlops/script/get-git-repo/run.sh index 60fc0336b8..cf3c125d39 100644 --- a/cm-mlops/script/get-git-repo/run.sh +++ b/cm-mlops/script/get-git-repo/run.sh @@ -11,7 +11,7 @@ if [ ! -d "${CM_TMP_GIT_PATH}" ]; then echo "${CM_GIT_CLONE_CMD}"; ${CM_GIT_CLONE_CMD} - if [ "${?}" != "0" ]; then exit 1; fi + if [ "${?}" != "0" ]; then exit $?; fi cd ${folder} @@ -21,10 +21,9 @@ if [ ! -d "${CM_TMP_GIT_PATH}" ]; then cmd="git checkout -b ${CM_GIT_SHA} ${CM_GIT_SHA}" echo "$cmd" eval "$cmd" - fi - if [ "${?}" != "0" ]; then exit 1; fi + if [ "${?}" != "0" ]; then exit $?; fi - if [ ! -z ${CM_GIT_CHECKOUT_TAG} ]; then + elif [ ! -z ${CM_GIT_CHECKOUT_TAG} ]; then echo "" cmd="git fetch --all --tags" @@ -33,8 +32,14 @@ if [ ! -d "${CM_TMP_GIT_PATH}" ]; then cmd="git checkout tags/${CM_GIT_CHECKOUT_TAG} -b ${CM_GIT_CHECKOUT_TAG}" echo "$cmd" eval "$cmd" + if [ "${?}" != "0" ]; then exit $?; fi + + else + cmd="git rev-parse HEAD >> ../tmp-cm-git-hash.out" + echo "$cmd" + eval "$cmd" + if [ "${?}" != "0" ]; then exit $?; fi fi - if [ "${?}" != "0" ]; then exit 1; fi else cd ${folder} @@ -47,7 +52,7 @@ for submodule in "${submodules[@]}" do echo "Initializing submodule ${submodule}" git submodule update --init "${submodule}" - if [ "${?}" != "0" ]; then exit 1; fi + if [ "${?}" != "0" ]; then exit $?; fi done if [ ${CM_GIT_PATCH} == "yes" ]; then @@ -56,7 +61,7 @@ if [ ${CM_GIT_PATCH} == "yes" ]; then do echo "Applying patch $patch_file" git apply "$patch_file" - if [ "${?}" != "0" ]; then exit 1; fi + if [ "${?}" != "0" ]; then exit $?; fi done fi cd "$CUR_DIR" diff --git a/cm-mlops/script/get-mlperf-inference-src/customize.py b/cm-mlops/script/get-mlperf-inference-src/customize.py index 80e28002da..8df8c3ed88 100644 --- a/cm-mlops/script/get-mlperf-inference-src/customize.py +++ b/cm-mlops/script/get-mlperf-inference-src/customize.py @@ -79,6 +79,9 @@ def postprocess(i): state['CM_MLPERF_INFERENCE_MODELS'] = valid_models + if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '': + env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH'] + return {'return':0} diff --git a/cm-mlops/script/get-mlperf-power-dev/customize.py b/cm-mlops/script/get-mlperf-power-dev/customize.py index a5d4a2ebcb..2af085d740 100644 --- a/cm-mlops/script/get-mlperf-power-dev/customize.py +++ b/cm-mlops/script/get-mlperf-power-dev/customize.py @@ -11,5 +11,11 @@ def preprocess(i): def postprocess(i): + env = i['env'] + if env.get('CM_VERSION', '') == '': + env['CM_VERSION'] = "master" + + if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '': + env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH'] return {'return':0} From 0c5ff5b5c84d6cac0e4ee7f60903bf0e0113c455 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 13 Feb 2024 13:58:42 +0000 Subject: [PATCH 33/33] Removed stale run files --- .../get-mlperf-inference-results-dir/run.bat | 1 - .../get-mlperf-inference-results-dir/run.sh | 32 ------------------- .../run.bat | 1 - .../run.sh | 32 ------------------- 4 files changed, 66 deletions(-) delete mode 100644 cm-mlops/script/get-mlperf-inference-results-dir/run.bat delete mode 100644 cm-mlops/script/get-mlperf-inference-results-dir/run.sh delete mode 100644 cm-mlops/script/get-mlperf-inference-submission-dir/run.bat delete mode 100644 cm-mlops/script/get-mlperf-inference-submission-dir/run.sh diff --git a/cm-mlops/script/get-mlperf-inference-results-dir/run.bat b/cm-mlops/script/get-mlperf-inference-results-dir/run.bat deleted file mode 100644 index 648302ca71..0000000000 --- a/cm-mlops/script/get-mlperf-inference-results-dir/run.bat +++ /dev/null @@ -1 +0,0 @@ -rem native script diff --git a/cm-mlops/script/get-mlperf-inference-results-dir/run.sh b/cm-mlops/script/get-mlperf-inference-results-dir/run.sh deleted file mode 100644 index eb5ce24565..0000000000 --- a/cm-mlops/script/get-mlperf-inference-results-dir/run.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} - -#To export any variable -#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out - -#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency - - - -function exit_if_error() { - test $? -eq 0 || exit $? -} - -function run() { - echo "Running: " - echo "$1" - echo "" - if [[ ${CM_FAKE_RUN} != 'yes' ]]; then - eval "$1" - exit_if_error - fi -} - -#Add your run commands here... -# run "$CM_RUN_CMD" - -scratch_path=${CM_NVIDIA_MLPERF_SCRATCH_PATH} -mkdir -p ${scratch_path}/data -mkdir -p ${scratch_path}/preprocessed_data -mkdir -p ${scratch_path}/models diff --git a/cm-mlops/script/get-mlperf-inference-submission-dir/run.bat b/cm-mlops/script/get-mlperf-inference-submission-dir/run.bat deleted file mode 100644 index 648302ca71..0000000000 --- a/cm-mlops/script/get-mlperf-inference-submission-dir/run.bat +++ /dev/null @@ -1 +0,0 @@ -rem native script diff --git a/cm-mlops/script/get-mlperf-inference-submission-dir/run.sh b/cm-mlops/script/get-mlperf-inference-submission-dir/run.sh deleted file mode 100644 index eb5ce24565..0000000000 --- a/cm-mlops/script/get-mlperf-inference-submission-dir/run.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} - -#To export any variable -#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out - -#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency - - - -function exit_if_error() { - test $? -eq 0 || exit $? -} - -function run() { - echo "Running: " - echo "$1" - echo "" - if [[ ${CM_FAKE_RUN} != 'yes' ]]; then - eval "$1" - exit_if_error - fi -} - -#Add your run commands here... -# run "$CM_RUN_CMD" - -scratch_path=${CM_NVIDIA_MLPERF_SCRATCH_PATH} -mkdir -p ${scratch_path}/data -mkdir -p ${scratch_path}/preprocessed_data -mkdir -p ${scratch_path}/models