Skip to content

Commit

Permalink
various improvements to support MLPerf inference v4.0 (#1116)
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh authored Feb 20, 2024
2 parents 144c512 + 3d08c07 commit c922612
Show file tree
Hide file tree
Showing 74 changed files with 1,776 additions and 114 deletions.
2 changes: 1 addition & 1 deletion cm-mlops/automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -4161,7 +4161,7 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"):
please report this issue with a full log at "https://github.com/mlcommons/ck".
The CM concept is to collaboratively fix such issues inside portable CM scripts
to make existing tools and native scripts more portable, interoperable
and deterministic. Thank you!'''
and deterministic. Thank you'''

return {'return':2, 'error':'Portable CM script failed (name = {}, return code = {})\n\n{}'.format(meta['alias'], rc, note)}

Expand Down
2 changes: 1 addition & 1 deletion cm-mlops/cfg/benchmark-list/mlperf-inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ urls:
url: "https://github.com/mlcommons/inference"
- name: "ArXiv paper"
url: "https://arxiv.org/abs/1911.02549"
- name: "MLCommons CM automation (under development)"
- name: "MLCommons CM automation for MLPerf inference"
url: "https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference"

script_name: run-mlperf-inference-app,4a5d5b13fd7e4ac8
Expand Down
7 changes: 7 additions & 0 deletions cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,18 @@ view_dimensions:
- "MLPerf backend"
- - input.model
- "MLPerf model"
- - input.precision
- "Model precision"
- - input.scenario
- "MLPerf scenario"
- - input.host_os
- "Host OS"
- - output.state.cm-mlperf-inference-results-last.performance
- "Got performance"
- "tick"
- - output.state.cm-mlperf-inference-results-last.accuracy
- "Got accuracy"
- "tick"
- - output.state.cm-mlperf-inference-results-last.power
- "Got energy"
- "tick"
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"action": "run",
"automation": "script",
"tags": "run-mlperf-inference,_r4.0,_performance-only,_short",
"division": "open",
"category": "datacenter",
"device": "qaic",
"model": "bert-99.9",
"precision": "float16",
"implementation": "qualcomm",
"backend": "glow",
"scenario": "Offline",
"execution_mode": "test",
"power": "no",
"adr": {
"python": {
"version_min": "3.8"
}
},
"clean": true,
"compliance": "no",
"j": true,
"jf": "run-0eeb9799b12b488f",
"quiet": true,
"time": true,
"host_os": "linux",
"cmd": [
"--tags=run-mlperf-inference,_r4.0,_performance-only,_short",
"--division=open",
"--category=datacenter",
"--device=qaic",
"--model=bert-99.9",
"--precision=float16",
"--implementation=qualcomm",
"--backend=glow",
"--scenario=Offline",
"--execution_mode=test",
"--power=no",
"--adr.python.version_min=3.8",
"--clean",
"--compliance=no",
"--j",
"--quiet",
"--time",
"--host_os=linux"
],
"out": "con",
"parsed_automation": [
[
"script",
"5b4e0237da074764"
]
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"uid": "800fe1b33ca443da",
"compute_uid": "d2ae645066664463",
"bench_uid": "39877bb63fb54725",
"date_time": "2024-02-20T15:25:03.786139",
"functional": true,
"reproduced": true,
"support_docker": true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"return": 0,
"env": {},
"new_env": {},
"state": {
"cm-mlperf-inference-results-last": {
"performance": "tested-will-be-added-in-v4.0",
"performance_valid": true
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"action": "run",
"automation": "script",
"tags": "run-mlperf-inference,_r4.0,_performance-only,_short",
"division": "open",
"category": "edge",
"device": "cpu",
"model": "bert-99",
"precision": "int8",
"implementation": "reference",
"backend": "deepsparse",
"scenario": "Offline",
"execution_mode": "test",
"power": "no",
"adr": {
"python": {
"version_min": "3.8"
}
},
"clean": true,
"compliance": "no",
"j": true,
"jf": "run-0eeb9799b12b488f",
"quiet": true,
"time": true,
"host_os": "linux",
"cmd": [
"--tags=run-mlperf-inference,_r4.0,_performance-only,_short",
"--division=open",
"--category=edge",
"--device=cpu",
"--model=bert-99",
"--precision=int8",
"--implementation=reference",
"--backend=deepsparse",
"--scenario=Offline",
"--execution_mode=test",
"--power=no",
"--adr.python.version_min=3.8",
"--clean",
"--compliance=no",
"--j",
"--jf=run-0eeb9799b12b488f",
"--quiet",
"--time",
"--host_os=linux"
],
"out": "con",
"parsed_automation": [
[
"script",
"5b4e0237da074764"
]
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"uid": "12242042335e4bc8",
"compute_uid": "ee8c568e0ac44f2b",
"bench_uid": "39877bb63fb54725",
"date_time": "2024-02-20T15:15:53.984671",
"functional": true,
"reproduced": true,
"support_docker": true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
{
"return": 0,
"env": {},
"new_env": {},
"state": {
"app_mlperf_inference_log_summary": {
"sut name": "PySUT",
"scenario": "Offline",
"mode": "PerformanceOnly",
"samples per second": "12.4548",
"result is": "VALID",
"min duration satisfied": "Yes",
"min queries satisfied": "Yes",
"early stopping satisfied": "Yes",
"min latency (ns)": "64039368",
"max latency (ns)": "802905050",
"mean latency (ns)": "372956875",
"50.00 percentile latency (ns)": "378435867",
"90.00 percentile latency (ns)": "802905050",
"95.00 percentile latency (ns)": "802905050",
"97.00 percentile latency (ns)": "802905050",
"99.00 percentile latency (ns)": "802905050",
"99.90 percentile latency (ns)": "802905050",
"samples_per_query": "10",
"target_qps": "1",
"target_latency (ns)": "0",
"max_async_queries": "1",
"min_duration (ms)": "0",
"max_duration (ms)": "0",
"min_query_count": "1",
"max_query_count": "10",
"qsl_rng_seed": "13281865557512327830",
"sample_index_rng_seed": "198141574272810017",
"schedule_rng_seed": "7575108116881280410",
"accuracy_log_rng_seed": "0",
"accuracy_log_probability": "0",
"accuracy_log_sampling_target": "0",
"print_timestamps": "0",
"performance_issue_unique": "0",
"performance_issue_same": "0",
"performance_issue_same_index": "0",
"performance_sample_count": "10833"
},
"app_mlperf_inference_measurements": {
"starting_weights_filename": "https://github.com/mlcommons/inference_results_v2.1/raw/master/open/NeuralMagic/code/bert/deepsparse/models/oBERT-Large_95sparse_block4_qat.onnx.tar.xz",
"retraining": "no",
"input_data_types": "fp32",
"weight_data_types": "fp32",
"weight_transformations": "none"
},
"cm-mlperf-inference-results": {
"ip_172_31_89_56-reference-cpu-deepsparse-vdefault-default_config": {
"bert-99": {
"Offline": {
"performance": "12.455",
"performance_valid": true
}
}
}
},
"cm-mlperf-inference-results-last": {
"performance": "12.455",
"performance_valid": true
}
},
"new_state": {
"app_mlperf_inference_log_summary": {
"sut name": "PySUT",
"scenario": "Offline",
"mode": "PerformanceOnly",
"samples per second": "12.4548",
"result is": "VALID",
"min duration satisfied": "Yes",
"min queries satisfied": "Yes",
"early stopping satisfied": "Yes",
"min latency (ns)": "64039368",
"max latency (ns)": "802905050",
"mean latency (ns)": "372956875",
"50.00 percentile latency (ns)": "378435867",
"90.00 percentile latency (ns)": "802905050",
"95.00 percentile latency (ns)": "802905050",
"97.00 percentile latency (ns)": "802905050",
"99.00 percentile latency (ns)": "802905050",
"99.90 percentile latency (ns)": "802905050",
"samples_per_query": "10",
"target_qps": "1",
"target_latency (ns)": "0",
"max_async_queries": "1",
"min_duration (ms)": "0",
"max_duration (ms)": "0",
"min_query_count": "1",
"max_query_count": "10",
"qsl_rng_seed": "13281865557512327830",
"sample_index_rng_seed": "198141574272810017",
"schedule_rng_seed": "7575108116881280410",
"accuracy_log_rng_seed": "0",
"accuracy_log_probability": "0",
"accuracy_log_sampling_target": "0",
"print_timestamps": "0",
"performance_issue_unique": "0",
"performance_issue_same": "0",
"performance_issue_same_index": "0",
"performance_sample_count": "10833"
},
"app_mlperf_inference_measurements": {
"starting_weights_filename": "https://github.com/mlcommons/inference_results_v2.1/raw/master/open/NeuralMagic/code/bert/deepsparse/models/oBERT-Large_95sparse_block4_qat.onnx.tar.xz",
"retraining": "no",
"input_data_types": "fp32",
"weight_data_types": "fp32",
"weight_transformations": "none"
},
"cm-mlperf-inference-results": {
"ip_172_31_89_56-reference-cpu-deepsparse-vdefault-default_config": {
"bert-99": {
"Offline": {
"performance": "12.455",
"performance_valid": true
}
}
}
},
"cm-mlperf-inference-results-last": {
"performance": "12.455",
"performance_valid": true
}
},
"deps": [
"detect,os",
"detect,cpu",
"get,python3",
"get,mlcommons,inference,src",
"get,sut,description",
"get,mlperf,inference,results,dir",
"install,pip-package,for-cmind-python,_package.tabulate",
"get,mlperf,inference,utils"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"action": "run",
"automation": "script",
"tags": "run-mlperf-inference,_r4.0,_performance-only,_short",
"division": "open",
"category": "edge",
"device": "cpu",
"model": "bert-99.9",
"precision": "float32",
"implementation": "reference",
"backend": "onnxruntime",
"scenario": "Offline",
"execution_mode": "test",
"power": "no",
"adr": {
"python": {
"version_min": "3.8"
}
},
"clean": true,
"compliance": "no",
"j": true,
"jf": "run-52c1d43172664ed0",
"quiet": true,
"time": true,
"host_os": "linux",
"cmd": [
"--tags=run-mlperf-inference,_r4.0,_performance-only,_short",
"--division=open",
"--category=edge",
"--device=cpu",
"--model=bert-99.9",
"--precision=float32",
"--implementation=reference",
"--backend=onnxruntime",
"--scenario=Offline",
"--execution_mode=test",
"--power=no",
"--adr.python.version_min=3.8",
"--clean",
"--compliance=no",
"--j",
"--jf=run-52c1d43172664ed0",
"--quiet",
"--time",
"--host_os=linux"
],
"out": "con",
"parsed_automation": [
[
"script",
"5b4e0237da074764"
]
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"uid": "52c1d43172664ed0",
"compute_uid": "ee8c568e0ac44f2b",
"bench_uid": "39877bb63fb54725",
"date_time": "2024-02-20T15:04:13.424211",
"functional": true,
"reproduced": true,
"support_docker": true
}
Loading

0 comments on commit c922612

Please sign in to comment.