Merge branch 'master' of https://github.com/ctuning/mlcommons-ck

mlcommons · Feb 14, 2024 · 916b40d · 916b40d
2 parents 30b0d47 + 578d483
commit 916b40d
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 13 deletions.
diff --git a/cm-mlops/script/app-loadgen-generic-python/README-extra.md b/cm-mlops/script/app-loadgen-generic-python/README-extra.md
@@ -198,14 +198,28 @@ You can also specify any custom onnx model file as follows:
 cm run script "python app loadgen-generic _onnxruntime" --modelpath=<CUSTOM_MODEL_FILE_PATH>
 ```
 
-
 ### Benchmark Hugging Face model
 
 ```bash
-cm run script "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.alpindale/Llama-2-13b-ONNX" --adr.hf-downloader.model_filename=FP32/LlamaV2_13B_float32.onnx
+cmr "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx
 ```
 
+*See more examples to download Hugging Face models via CM [here](../get-ml-model-huggingface-zoo/README-extra.md).*
+
+### Benchmark using ONNX CUDA
+
+```bash
+cm rm cache -f
+cmr "python app loadgen-generic _onnxruntime _cuda _retinanet" --quiet
+cmr "python app loadgen-generic _onnxruntime _cuda _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx
+```
 
+TBD: some cases that are not yet fully supported:
+```bash
+cmr "python app loadgen-generic _onnxruntime _cuda _custom _huggingface _model-stub.microsoft/Mistral-7B-v0.1-onnx" --adr.hf-downloader.model_filename=Mistral-7B-v0.1.onnx,Mistral-7B-v0.1.onnx.data
+cmr "python app loadgen-generic _onnxruntime _cuda _custom _huggingface _model-stub.alpindale/Llama-2-13b-ONNX" --adr.hf-downloader.model_filename=--model_filename=FP32/LlamaV2_13B_float32.onnx --adr.hf-downloader.full_subfolder=FP32
+cmr "python app loadgen-generic _onnxruntime _cuda _custom _huggingface _model-stub.Intel/gpt-j-6B-int8-static" --adr.hf-downloader.model_filename=model.onnx --adr.hf-downloader.full_subfolder=.
+```
 
 ### Other variations and flags:
 

diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py
@@ -204,6 +204,14 @@ def postprocess(i):
             shutil.copy(env['CM_MLPERF_USER_CONF'], 'user.conf')
 
         result = mlperf_utils.get_result_from_log(env['CM_MLPERF_LAST_RELEASE'], model, scenario, output_dir, mode)
+        power = None
+        power_efficiency = None
+        if mode == performance:
+            result_split = result.split(",")
+            if len(result_split) > 2: #power results are there
+                power = result_split[1]
+                power_efficiency = result_split[2]
+
         if not state.get('CM_MLPERF_RESULTS'):
             state['CM_MLPERF_RESULTS'] = {}
         if not state['CM_MLPERF_RESULTS'].get(state['CM_SUT_CONFIG_NAME']):
@@ -213,6 +221,11 @@ def postprocess(i):
         if not state['CM_MLPERF_RESULTS'][state['CM_SUT_CONFIG_NAME']][model].get(scenario):
             state['CM_MLPERF_RESULTS'][state['CM_SUT_CONFIG_NAME']][model][scenario] = {}
         state['CM_MLPERF_RESULTS'][state['CM_SUT_CONFIG_NAME']][model][scenario][mode] = result
+        if power:
+            state['CM_MLPERF_RESULTS'][state['CM_SUT_CONFIG_NAME']][model][scenario]['power'] = power
+        if power_efficiency:
+            state['CM_MLPERF_RESULTS'][state['CM_SUT_CONFIG_NAME']][model][scenario]['power_efficiency'] = power_efficiency
+
 
         # Record basic host info
         host_info = {

diff --git a/cm-mlops/script/get-ml-model-huggingface-zoo/README-extra.md b/cm-mlops/script/get-ml-model-huggingface-zoo/README-extra.md
@@ -1,9 +1,5 @@
 # Examples
 
-```bash
-cmr "get ml-model huggingface zoo _model-stub.alpindale/Llama-2-13b-ONNX" --model_filename=FP32/LlamaV2_13B_float32.onnx
-```
-
 ```bash
 cmr "get ml-model huggingface zoo _model-stub.alpindale/Llama-2-13b-ONNX" --model_filename=FP32/LlamaV2_13B_float32.onnx --full_subfolder=FP32
 ```

diff --git a/cm-mlops/script/get-mlperf-inference-utils/mlperf_utils.py b/cm-mlops/script/get-mlperf-inference-utils/mlperf_utils.py
@@ -18,10 +18,12 @@ def get_result_from_log(version, model, scenario, result_path, mode):
     result = ''
     if mode == "performance":
         has_power = os.path.exists(os.path.join(result_path, "power"))
-        result = str(checker.get_performance_metric(config, mlperf_model, result_path, scenario, None, None, has_power))
+        result_ = checker.get_performance_metric(config, mlperf_model, result_path, scenario, None, None, has_power)
+        result = str(round(result_, 3)
         if has_power:
-            is_valid, power_metric, scenario, avg_power_efficiency = checker.get_power_metric(config, scenario, result_path, True, result)
-            result += f",{power_metric},{avg_power_efficiency*1000} "
+            is_valid, power_metric, scenario, avg_power_efficiency = checker.get_power_metric(config, scenario, result_path, True, result_)
+            result += f",{power_metric},{avg_power_efficiency}"
+
 
     elif mode == "accuracy" and os.path.exists(os.path.join(result_path, 'accuracy.txt')):
 
@@ -32,9 +34,10 @@ def get_result_from_log(version, model, scenario, result_path, mode):
                 result = str(round(float(acc_results[acc]), 5))
         else:
             result = '('
+            result_list = []
             for i, acc in enumerate(acc_results):
-                result += str(round(float(acc_results[acc]), 5))
-            result += ")"
+                result_list.append(str(round(float(acc_results[acc]), 5)))
+            result += ", ".join(result_list) + ")"
 
     return result
 
@@ -129,7 +132,7 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res)
         performance_result_ = performance_result / 1000000 #convert to milliseconds
     else:
         performance_result_ = performance_result
-    result['performance'] = performance_result_
+    result['performance'] = round(performance_result_, 3)
 
     if scenario != effective_scenario:
         inferred, inferred_result = checker.get_inferred_result(scenario, effective_scenario, performance_result, mlperf_log, config, False)
@@ -170,7 +173,7 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res)
     if len(accuracy_results) == 1:
         accuracy_result = accuracy_results[0]
     else:
-        accuracy_result = "(" + ",".join(accuracy_results)+")"
+        accuracy_result = "(" + ", ".join(accuracy_results)+")"
     result['accuracy'] = accuracy_result
 
     result_string = f"\n\n## Results\n"
@@ -217,4 +220,5 @@ def get_result_table(results):
             if results[model][scenario].get('power_efficiency','') != '':
                 row.append(results[model][scenario]['power_efficiency'])
             table.append(row)
+
     return table, headers
diff --git a/cm-mlops/script/gui/playground_scripts.py b/cm-mlops/script/gui/playground_scripts.py
@@ -129,6 +129,7 @@ def page(st, params):
 
 A few other popular commands:
 ```bash
+cmr "{}" --help
 cmr "{}" --shell
 cm run script "{}" --shell
 cm docker script "{}"