-
Notifications
You must be signed in to change notification settings - Fork 120
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
various improvements to support MLPerf inference v4.0 (#1116)
- Loading branch information
Showing
74 changed files
with
1,776 additions
and
114 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
54 changes: 54 additions & 0 deletions
54
cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-005147815bf840b8-input.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
{ | ||
"action": "run", | ||
"automation": "script", | ||
"tags": "run-mlperf-inference,_r4.0,_performance-only,_short", | ||
"division": "open", | ||
"category": "datacenter", | ||
"device": "qaic", | ||
"model": "bert-99.9", | ||
"precision": "float16", | ||
"implementation": "qualcomm", | ||
"backend": "glow", | ||
"scenario": "Offline", | ||
"execution_mode": "test", | ||
"power": "no", | ||
"adr": { | ||
"python": { | ||
"version_min": "3.8" | ||
} | ||
}, | ||
"clean": true, | ||
"compliance": "no", | ||
"j": true, | ||
"jf": "run-0eeb9799b12b488f", | ||
"quiet": true, | ||
"time": true, | ||
"host_os": "linux", | ||
"cmd": [ | ||
"--tags=run-mlperf-inference,_r4.0,_performance-only,_short", | ||
"--division=open", | ||
"--category=datacenter", | ||
"--device=qaic", | ||
"--model=bert-99.9", | ||
"--precision=float16", | ||
"--implementation=qualcomm", | ||
"--backend=glow", | ||
"--scenario=Offline", | ||
"--execution_mode=test", | ||
"--power=no", | ||
"--adr.python.version_min=3.8", | ||
"--clean", | ||
"--compliance=no", | ||
"--j", | ||
"--quiet", | ||
"--time", | ||
"--host_os=linux" | ||
], | ||
"out": "con", | ||
"parsed_automation": [ | ||
[ | ||
"script", | ||
"5b4e0237da074764" | ||
] | ||
] | ||
} |
9 changes: 9 additions & 0 deletions
9
cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-005147815bf840b8-meta.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"uid": "800fe1b33ca443da", | ||
"compute_uid": "d2ae645066664463", | ||
"bench_uid": "39877bb63fb54725", | ||
"date_time": "2024-02-20T15:25:03.786139", | ||
"functional": true, | ||
"reproduced": true, | ||
"support_docker": true | ||
} |
11 changes: 11 additions & 0 deletions
11
cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-005147815bf840b8-output.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"return": 0, | ||
"env": {}, | ||
"new_env": {}, | ||
"state": { | ||
"cm-mlperf-inference-results-last": { | ||
"performance": "tested-will-be-added-in-v4.0", | ||
"performance_valid": true | ||
} | ||
} | ||
} |
55 changes: 55 additions & 0 deletions
55
cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-0eeb9799b12b488f-input.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
{ | ||
"action": "run", | ||
"automation": "script", | ||
"tags": "run-mlperf-inference,_r4.0,_performance-only,_short", | ||
"division": "open", | ||
"category": "edge", | ||
"device": "cpu", | ||
"model": "bert-99", | ||
"precision": "int8", | ||
"implementation": "reference", | ||
"backend": "deepsparse", | ||
"scenario": "Offline", | ||
"execution_mode": "test", | ||
"power": "no", | ||
"adr": { | ||
"python": { | ||
"version_min": "3.8" | ||
} | ||
}, | ||
"clean": true, | ||
"compliance": "no", | ||
"j": true, | ||
"jf": "run-0eeb9799b12b488f", | ||
"quiet": true, | ||
"time": true, | ||
"host_os": "linux", | ||
"cmd": [ | ||
"--tags=run-mlperf-inference,_r4.0,_performance-only,_short", | ||
"--division=open", | ||
"--category=edge", | ||
"--device=cpu", | ||
"--model=bert-99", | ||
"--precision=int8", | ||
"--implementation=reference", | ||
"--backend=deepsparse", | ||
"--scenario=Offline", | ||
"--execution_mode=test", | ||
"--power=no", | ||
"--adr.python.version_min=3.8", | ||
"--clean", | ||
"--compliance=no", | ||
"--j", | ||
"--jf=run-0eeb9799b12b488f", | ||
"--quiet", | ||
"--time", | ||
"--host_os=linux" | ||
], | ||
"out": "con", | ||
"parsed_automation": [ | ||
[ | ||
"script", | ||
"5b4e0237da074764" | ||
] | ||
] | ||
} |
9 changes: 9 additions & 0 deletions
9
cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-0eeb9799b12b488f-meta.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"uid": "12242042335e4bc8", | ||
"compute_uid": "ee8c568e0ac44f2b", | ||
"bench_uid": "39877bb63fb54725", | ||
"date_time": "2024-02-20T15:15:53.984671", | ||
"functional": true, | ||
"reproduced": true, | ||
"support_docker": true | ||
} |
137 changes: 137 additions & 0 deletions
137
cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-0eeb9799b12b488f-output.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
{ | ||
"return": 0, | ||
"env": {}, | ||
"new_env": {}, | ||
"state": { | ||
"app_mlperf_inference_log_summary": { | ||
"sut name": "PySUT", | ||
"scenario": "Offline", | ||
"mode": "PerformanceOnly", | ||
"samples per second": "12.4548", | ||
"result is": "VALID", | ||
"min duration satisfied": "Yes", | ||
"min queries satisfied": "Yes", | ||
"early stopping satisfied": "Yes", | ||
"min latency (ns)": "64039368", | ||
"max latency (ns)": "802905050", | ||
"mean latency (ns)": "372956875", | ||
"50.00 percentile latency (ns)": "378435867", | ||
"90.00 percentile latency (ns)": "802905050", | ||
"95.00 percentile latency (ns)": "802905050", | ||
"97.00 percentile latency (ns)": "802905050", | ||
"99.00 percentile latency (ns)": "802905050", | ||
"99.90 percentile latency (ns)": "802905050", | ||
"samples_per_query": "10", | ||
"target_qps": "1", | ||
"target_latency (ns)": "0", | ||
"max_async_queries": "1", | ||
"min_duration (ms)": "0", | ||
"max_duration (ms)": "0", | ||
"min_query_count": "1", | ||
"max_query_count": "10", | ||
"qsl_rng_seed": "13281865557512327830", | ||
"sample_index_rng_seed": "198141574272810017", | ||
"schedule_rng_seed": "7575108116881280410", | ||
"accuracy_log_rng_seed": "0", | ||
"accuracy_log_probability": "0", | ||
"accuracy_log_sampling_target": "0", | ||
"print_timestamps": "0", | ||
"performance_issue_unique": "0", | ||
"performance_issue_same": "0", | ||
"performance_issue_same_index": "0", | ||
"performance_sample_count": "10833" | ||
}, | ||
"app_mlperf_inference_measurements": { | ||
"starting_weights_filename": "https://github.com/mlcommons/inference_results_v2.1/raw/master/open/NeuralMagic/code/bert/deepsparse/models/oBERT-Large_95sparse_block4_qat.onnx.tar.xz", | ||
"retraining": "no", | ||
"input_data_types": "fp32", | ||
"weight_data_types": "fp32", | ||
"weight_transformations": "none" | ||
}, | ||
"cm-mlperf-inference-results": { | ||
"ip_172_31_89_56-reference-cpu-deepsparse-vdefault-default_config": { | ||
"bert-99": { | ||
"Offline": { | ||
"performance": "12.455", | ||
"performance_valid": true | ||
} | ||
} | ||
} | ||
}, | ||
"cm-mlperf-inference-results-last": { | ||
"performance": "12.455", | ||
"performance_valid": true | ||
} | ||
}, | ||
"new_state": { | ||
"app_mlperf_inference_log_summary": { | ||
"sut name": "PySUT", | ||
"scenario": "Offline", | ||
"mode": "PerformanceOnly", | ||
"samples per second": "12.4548", | ||
"result is": "VALID", | ||
"min duration satisfied": "Yes", | ||
"min queries satisfied": "Yes", | ||
"early stopping satisfied": "Yes", | ||
"min latency (ns)": "64039368", | ||
"max latency (ns)": "802905050", | ||
"mean latency (ns)": "372956875", | ||
"50.00 percentile latency (ns)": "378435867", | ||
"90.00 percentile latency (ns)": "802905050", | ||
"95.00 percentile latency (ns)": "802905050", | ||
"97.00 percentile latency (ns)": "802905050", | ||
"99.00 percentile latency (ns)": "802905050", | ||
"99.90 percentile latency (ns)": "802905050", | ||
"samples_per_query": "10", | ||
"target_qps": "1", | ||
"target_latency (ns)": "0", | ||
"max_async_queries": "1", | ||
"min_duration (ms)": "0", | ||
"max_duration (ms)": "0", | ||
"min_query_count": "1", | ||
"max_query_count": "10", | ||
"qsl_rng_seed": "13281865557512327830", | ||
"sample_index_rng_seed": "198141574272810017", | ||
"schedule_rng_seed": "7575108116881280410", | ||
"accuracy_log_rng_seed": "0", | ||
"accuracy_log_probability": "0", | ||
"accuracy_log_sampling_target": "0", | ||
"print_timestamps": "0", | ||
"performance_issue_unique": "0", | ||
"performance_issue_same": "0", | ||
"performance_issue_same_index": "0", | ||
"performance_sample_count": "10833" | ||
}, | ||
"app_mlperf_inference_measurements": { | ||
"starting_weights_filename": "https://github.com/mlcommons/inference_results_v2.1/raw/master/open/NeuralMagic/code/bert/deepsparse/models/oBERT-Large_95sparse_block4_qat.onnx.tar.xz", | ||
"retraining": "no", | ||
"input_data_types": "fp32", | ||
"weight_data_types": "fp32", | ||
"weight_transformations": "none" | ||
}, | ||
"cm-mlperf-inference-results": { | ||
"ip_172_31_89_56-reference-cpu-deepsparse-vdefault-default_config": { | ||
"bert-99": { | ||
"Offline": { | ||
"performance": "12.455", | ||
"performance_valid": true | ||
} | ||
} | ||
} | ||
}, | ||
"cm-mlperf-inference-results-last": { | ||
"performance": "12.455", | ||
"performance_valid": true | ||
} | ||
}, | ||
"deps": [ | ||
"detect,os", | ||
"detect,cpu", | ||
"get,python3", | ||
"get,mlcommons,inference,src", | ||
"get,sut,description", | ||
"get,mlperf,inference,results,dir", | ||
"install,pip-package,for-cmind-python,_package.tabulate", | ||
"get,mlperf,inference,utils" | ||
] | ||
} |
55 changes: 55 additions & 0 deletions
55
cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-52c1d43172664ed0-input.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
{ | ||
"action": "run", | ||
"automation": "script", | ||
"tags": "run-mlperf-inference,_r4.0,_performance-only,_short", | ||
"division": "open", | ||
"category": "edge", | ||
"device": "cpu", | ||
"model": "bert-99.9", | ||
"precision": "float32", | ||
"implementation": "reference", | ||
"backend": "onnxruntime", | ||
"scenario": "Offline", | ||
"execution_mode": "test", | ||
"power": "no", | ||
"adr": { | ||
"python": { | ||
"version_min": "3.8" | ||
} | ||
}, | ||
"clean": true, | ||
"compliance": "no", | ||
"j": true, | ||
"jf": "run-52c1d43172664ed0", | ||
"quiet": true, | ||
"time": true, | ||
"host_os": "linux", | ||
"cmd": [ | ||
"--tags=run-mlperf-inference,_r4.0,_performance-only,_short", | ||
"--division=open", | ||
"--category=edge", | ||
"--device=cpu", | ||
"--model=bert-99.9", | ||
"--precision=float32", | ||
"--implementation=reference", | ||
"--backend=onnxruntime", | ||
"--scenario=Offline", | ||
"--execution_mode=test", | ||
"--power=no", | ||
"--adr.python.version_min=3.8", | ||
"--clean", | ||
"--compliance=no", | ||
"--j", | ||
"--jf=run-52c1d43172664ed0", | ||
"--quiet", | ||
"--time", | ||
"--host_os=linux" | ||
], | ||
"out": "con", | ||
"parsed_automation": [ | ||
[ | ||
"script", | ||
"5b4e0237da074764" | ||
] | ||
] | ||
} |
9 changes: 9 additions & 0 deletions
9
cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/run-52c1d43172664ed0-meta.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"uid": "52c1d43172664ed0", | ||
"compute_uid": "ee8c568e0ac44f2b", | ||
"bench_uid": "39877bb63fb54725", | ||
"date_time": "2024-02-20T15:04:13.424211", | ||
"functional": true, | ||
"reproduced": true, | ||
"support_docker": true | ||
} |
Oops, something went wrong.