various improvements to support MLPerf inference v4.0 (#1116)

mlcommons · Feb 20, 2024 · c922612 · c922612
2 parents 144c512 + 3d08c07
commit c922612
Show file tree

Hide file tree

Showing 74 changed files with 1,776 additions and 114 deletions.
diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py
@@ -4161,7 +4161,7 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"):
 please report this issue with a full log at "https://github.com/mlcommons/ck". 
 The CM concept is to collaboratively fix such issues inside portable CM scripts 
 to make existing tools and native scripts more portable, interoperable 
-and deterministic. Thank you!'''
+and deterministic. Thank you'''
 
             return {'return':2, 'error':'Portable CM script failed (name = {}, return code = {})\n\n{}'.format(meta['alias'], rc, note)}
 

diff --git a/cm-mlops/cfg/benchmark-list/mlperf-inference.yaml b/cm-mlops/cfg/benchmark-list/mlperf-inference.yaml
@@ -11,7 +11,7 @@ urls:
   url: "https://github.com/mlcommons/inference"
 - name: "ArXiv paper"
   url: "https://arxiv.org/abs/1911.02549"
-- name: "MLCommons CM automation (under development)"
+- name: "MLCommons CM automation for MLPerf inference"
   url: "https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference"
 
 script_name: run-mlperf-inference-app,4a5d5b13fd7e4ac8

diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml
@@ -28,11 +28,18 @@ view_dimensions:
   - "MLPerf backend"
 - - input.model
   - "MLPerf model"
+- - input.precision
+  - "Model precision"
 - - input.scenario
   - "MLPerf scenario"
 - - input.host_os
   - "Host OS"
 - - output.state.cm-mlperf-inference-results-last.performance
   - "Got performance"
+  - "tick"
 - - output.state.cm-mlperf-inference-results-last.accuracy
   - "Got accuracy"
+  - "tick"
+- - output.state.cm-mlperf-inference-results-last.power
+  - "Got energy"
+  - "tick"
diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-005147815bf840b8-input.json b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-005147815bf840b8-input.json
@@ -0,0 +1,54 @@
+{
+  "action": "run",
+  "automation": "script",
+  "tags": "run-mlperf-inference,_r4.0,_performance-only,_short",
+  "division": "open",
+  "category": "datacenter",
+  "device": "qaic",
+  "model": "bert-99.9",
+  "precision": "float16",
+  "implementation": "qualcomm",
+  "backend": "glow",
+  "scenario": "Offline",
+  "execution_mode": "test",
+  "power": "no",
+  "adr": {
+    "python": {
+      "version_min": "3.8"
+    }
+  },
+  "clean": true,
+  "compliance": "no",
+  "j": true,
+  "jf": "run-0eeb9799b12b488f",
+  "quiet": true,
+  "time": true,
+  "host_os": "linux",
+  "cmd": [
+    "--tags=run-mlperf-inference,_r4.0,_performance-only,_short",
+    "--division=open",
+    "--category=datacenter",
+    "--device=qaic",
+    "--model=bert-99.9",
+    "--precision=float16",
+    "--implementation=qualcomm",
+    "--backend=glow",
+    "--scenario=Offline",
+    "--execution_mode=test",
+    "--power=no",
+    "--adr.python.version_min=3.8",
+    "--clean",
+    "--compliance=no",
+    "--j",
+    "--quiet",
+    "--time",
+    "--host_os=linux"
+  ],
+  "out": "con",
+  "parsed_automation": [
+    [
+      "script",
+      "5b4e0237da074764"
+    ]
+  ]
+}
diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-005147815bf840b8-meta.json b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-005147815bf840b8-meta.json
@@ -0,0 +1,9 @@
+{
+  "uid": "800fe1b33ca443da",
+  "compute_uid": "d2ae645066664463",
+  "bench_uid": "39877bb63fb54725",
+  "date_time": "2024-02-20T15:25:03.786139",
+  "functional": true,
+  "reproduced": true,
+  "support_docker": true
+}
diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-005147815bf840b8-output.json b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-005147815bf840b8-output.json
@@ -0,0 +1,11 @@
+{
+  "return": 0,
+  "env": {},
+  "new_env": {},
+  "state": {
+    "cm-mlperf-inference-results-last": {
+      "performance": "tested-will-be-added-in-v4.0",
+      "performance_valid": true
+    }
+  }
+}
diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-0eeb9799b12b488f-input.json b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-0eeb9799b12b488f-input.json
@@ -0,0 +1,55 @@
+{
+  "action": "run",
+  "automation": "script",
+  "tags": "run-mlperf-inference,_r4.0,_performance-only,_short",
+  "division": "open",
+  "category": "edge",
+  "device": "cpu",
+  "model": "bert-99",
+  "precision": "int8",
+  "implementation": "reference",
+  "backend": "deepsparse",
+  "scenario": "Offline",
+  "execution_mode": "test",
+  "power": "no",
+  "adr": {
+    "python": {
+      "version_min": "3.8"
+    }
+  },
+  "clean": true,
+  "compliance": "no",
+  "j": true,
+  "jf": "run-0eeb9799b12b488f",
+  "quiet": true,
+  "time": true,
+  "host_os": "linux",
+  "cmd": [
+    "--tags=run-mlperf-inference,_r4.0,_performance-only,_short",
+    "--division=open",
+    "--category=edge",
+    "--device=cpu",
+    "--model=bert-99",
+    "--precision=int8",
+    "--implementation=reference",
+    "--backend=deepsparse",
+    "--scenario=Offline",
+    "--execution_mode=test",
+    "--power=no",
+    "--adr.python.version_min=3.8",
+    "--clean",
+    "--compliance=no",
+    "--j",
+    "--jf=run-0eeb9799b12b488f",
+    "--quiet",
+    "--time",
+    "--host_os=linux"
+  ],
+  "out": "con",
+  "parsed_automation": [
+    [
+      "script",
+      "5b4e0237da074764"
+    ]
+  ]
+}
diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-0eeb9799b12b488f-meta.json b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-0eeb9799b12b488f-meta.json
@@ -0,0 +1,9 @@
+{
+  "uid": "12242042335e4bc8",
+  "compute_uid": "ee8c568e0ac44f2b",
+  "bench_uid": "39877bb63fb54725",
+  "date_time": "2024-02-20T15:15:53.984671",
+  "functional": true,
+  "reproduced": true,
+  "support_docker": true
+}
diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-0eeb9799b12b488f-output.json b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-0eeb9799b12b488f-output.json
@@ -0,0 +1,137 @@
+{
+  "return": 0,
+  "env": {},
+  "new_env": {},
+  "state": {
+    "app_mlperf_inference_log_summary": {
+      "sut name": "PySUT",
+      "scenario": "Offline",
+      "mode": "PerformanceOnly",
+      "samples per second": "12.4548",
+      "result is": "VALID",
+      "min duration satisfied": "Yes",
+      "min queries satisfied": "Yes",
+      "early stopping satisfied": "Yes",
+      "min latency (ns)": "64039368",
+      "max latency (ns)": "802905050",
+      "mean latency (ns)": "372956875",
+      "50.00 percentile latency (ns)": "378435867",
+      "90.00 percentile latency (ns)": "802905050",
+      "95.00 percentile latency (ns)": "802905050",
+      "97.00 percentile latency (ns)": "802905050",
+      "99.00 percentile latency (ns)": "802905050",
+      "99.90 percentile latency (ns)": "802905050",
+      "samples_per_query": "10",
+      "target_qps": "1",
+      "target_latency (ns)": "0",
+      "max_async_queries": "1",
+      "min_duration (ms)": "0",
+      "max_duration (ms)": "0",
+      "min_query_count": "1",
+      "max_query_count": "10",
+      "qsl_rng_seed": "13281865557512327830",
+      "sample_index_rng_seed": "198141574272810017",
+      "schedule_rng_seed": "7575108116881280410",
+      "accuracy_log_rng_seed": "0",
+      "accuracy_log_probability": "0",
+      "accuracy_log_sampling_target": "0",
+      "print_timestamps": "0",
+      "performance_issue_unique": "0",
+      "performance_issue_same": "0",
+      "performance_issue_same_index": "0",
+      "performance_sample_count": "10833"
+    },
+    "app_mlperf_inference_measurements": {
+      "starting_weights_filename": "https://github.com/mlcommons/inference_results_v2.1/raw/master/open/NeuralMagic/code/bert/deepsparse/models/oBERT-Large_95sparse_block4_qat.onnx.tar.xz",
+      "retraining": "no",
+      "input_data_types": "fp32",
+      "weight_data_types": "fp32",
+      "weight_transformations": "none"
+    },
+    "cm-mlperf-inference-results": {
+      "ip_172_31_89_56-reference-cpu-deepsparse-vdefault-default_config": {
+        "bert-99": {
+          "Offline": {
+            "performance": "12.455",
+            "performance_valid": true
+          }
+        }
+      }
+    },
+    "cm-mlperf-inference-results-last": {
+      "performance": "12.455",
+      "performance_valid": true
+    }
+  },
+  "new_state": {
+    "app_mlperf_inference_log_summary": {
+      "sut name": "PySUT",
+      "scenario": "Offline",
+      "mode": "PerformanceOnly",
+      "samples per second": "12.4548",
+      "result is": "VALID",
+      "min duration satisfied": "Yes",
+      "min queries satisfied": "Yes",
+      "early stopping satisfied": "Yes",
+      "min latency (ns)": "64039368",
+      "max latency (ns)": "802905050",
+      "mean latency (ns)": "372956875",
+      "50.00 percentile latency (ns)": "378435867",
+      "90.00 percentile latency (ns)": "802905050",
+      "95.00 percentile latency (ns)": "802905050",
+      "97.00 percentile latency (ns)": "802905050",
+      "99.00 percentile latency (ns)": "802905050",
+      "99.90 percentile latency (ns)": "802905050",
+      "samples_per_query": "10",
+      "target_qps": "1",
+      "target_latency (ns)": "0",
+      "max_async_queries": "1",
+      "min_duration (ms)": "0",
+      "max_duration (ms)": "0",
+      "min_query_count": "1",
+      "max_query_count": "10",
+      "qsl_rng_seed": "13281865557512327830",
+      "sample_index_rng_seed": "198141574272810017",
+      "schedule_rng_seed": "7575108116881280410",
+      "accuracy_log_rng_seed": "0",
+      "accuracy_log_probability": "0",
+      "accuracy_log_sampling_target": "0",
+      "print_timestamps": "0",
+      "performance_issue_unique": "0",
+      "performance_issue_same": "0",
+      "performance_issue_same_index": "0",
+      "performance_sample_count": "10833"
+    },
+    "app_mlperf_inference_measurements": {
+      "starting_weights_filename": "https://github.com/mlcommons/inference_results_v2.1/raw/master/open/NeuralMagic/code/bert/deepsparse/models/oBERT-Large_95sparse_block4_qat.onnx.tar.xz",
+      "retraining": "no",
+      "input_data_types": "fp32",
+      "weight_data_types": "fp32",
+      "weight_transformations": "none"
+    },
+    "cm-mlperf-inference-results": {
+      "ip_172_31_89_56-reference-cpu-deepsparse-vdefault-default_config": {
+        "bert-99": {
+          "Offline": {
+            "performance": "12.455",
+            "performance_valid": true
+          }
+        }
+      }
+    },
+    "cm-mlperf-inference-results-last": {
+      "performance": "12.455",
+      "performance_valid": true
+    }
+  },
+  "deps": [
+    "detect,os",
+    "detect,cpu",
+    "get,python3",
+    "get,mlcommons,inference,src",
+    "get,sut,description",
+    "get,mlperf,inference,results,dir",
+    "install,pip-package,for-cmind-python,_package.tabulate",
+    "get,mlperf,inference,utils"
+  ]
+}
diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-52c1d43172664ed0-input.json b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-52c1d43172664ed0-input.json
@@ -0,0 +1,55 @@
+{
+  "action": "run",
+  "automation": "script",
+  "tags": "run-mlperf-inference,_r4.0,_performance-only,_short",
+  "division": "open",
+  "category": "edge",
+  "device": "cpu",
+  "model": "bert-99.9",
+  "precision": "float32",
+  "implementation": "reference",
+  "backend": "onnxruntime",
+  "scenario": "Offline",
+  "execution_mode": "test",
+  "power": "no",
+  "adr": {
+    "python": {
+      "version_min": "3.8"
+    }
+  },
+  "clean": true,
+  "compliance": "no",
+  "j": true,
+  "jf": "run-52c1d43172664ed0",
+  "quiet": true,
+  "time": true,
+  "host_os": "linux",
+  "cmd": [
+    "--tags=run-mlperf-inference,_r4.0,_performance-only,_short",
+    "--division=open",
+    "--category=edge",
+    "--device=cpu",
+    "--model=bert-99.9",
+    "--precision=float32",
+    "--implementation=reference",
+    "--backend=onnxruntime",
+    "--scenario=Offline",
+    "--execution_mode=test",
+    "--power=no",
+    "--adr.python.version_min=3.8",
+    "--clean",
+    "--compliance=no",
+    "--j",
+    "--jf=run-52c1d43172664ed0",
+    "--quiet",
+    "--time",
+    "--host_os=linux"
+  ],
+  "out": "con",
+  "parsed_automation": [
+    [
+      "script",
+      "5b4e0237da074764"
+    ]
+  ]
+}
diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-52c1d43172664ed0-meta.json b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-52c1d43172664ed0-meta.json
@@ -0,0 +1,9 @@
+{
+  "uid": "52c1d43172664ed0",
+  "compute_uid": "ee8c568e0ac44f2b",
+  "bench_uid": "39877bb63fb54725",
+  "date_time": "2024-02-20T15:04:13.424211",
+  "functional": true,
+  "reproduced": true,
+  "support_docker": true
+}