diff --git a/cm-mlops/script/app-loadgen-generic-python/README-extra.md b/cm-mlops/script/app-loadgen-generic-python/README-extra.md index ec63c851f5..240c85db94 100644 --- a/cm-mlops/script/app-loadgen-generic-python/README-extra.md +++ b/cm-mlops/script/app-loadgen-generic-python/README-extra.md @@ -198,14 +198,28 @@ You can also specify any custom onnx model file as follows: cm run script "python app loadgen-generic _onnxruntime" --modelpath= ``` - ### Benchmark Hugging Face model ```bash -cm run script "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.alpindale/Llama-2-13b-ONNX" --adr.hf-downloader.model_filename=FP32/LlamaV2_13B_float32.onnx +cmr "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx ``` +*See more examples to download Hugging Face models via CM [here](../get-ml-model-huggingface-zoo/README-extra.md).* + +### Benchmark using ONNX CUDA + +```bash +cm rm cache -f +cmr "python app loadgen-generic _onnxruntime _cuda _retinanet" --quiet +cmr "python app loadgen-generic _onnxruntime _cuda _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx +``` +TBD: some cases that are not yet fully supported: +```bash +cmr "python app loadgen-generic _onnxruntime _cuda _custom _huggingface _model-stub.microsoft/Mistral-7B-v0.1-onnx" --adr.hf-downloader.model_filename=Mistral-7B-v0.1.onnx,Mistral-7B-v0.1.onnx.data +cmr "python app loadgen-generic _onnxruntime _cuda _custom _huggingface _model-stub.alpindale/Llama-2-13b-ONNX" --adr.hf-downloader.model_filename=--model_filename=FP32/LlamaV2_13B_float32.onnx --adr.hf-downloader.full_subfolder=FP32 +cmr "python app loadgen-generic _onnxruntime _cuda _custom _huggingface _model-stub.Intel/gpt-j-6B-int8-static" --adr.hf-downloader.model_filename=model.onnx --adr.hf-downloader.full_subfolder=. +``` ### Other variations and flags: diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index e8fec8a6fb..7f14655f40 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -204,6 +204,14 @@ def postprocess(i): shutil.copy(env['CM_MLPERF_USER_CONF'], 'user.conf') result = mlperf_utils.get_result_from_log(env['CM_MLPERF_LAST_RELEASE'], model, scenario, output_dir, mode) + power = None + power_efficiency = None + if mode == performance: + result_split = result.split(",") + if len(result_split) > 2: #power results are there + power = result_split[1] + power_efficiency = result_split[2] + if not state.get('CM_MLPERF_RESULTS'): state['CM_MLPERF_RESULTS'] = {} if not state['CM_MLPERF_RESULTS'].get(state['CM_SUT_CONFIG_NAME']): @@ -213,6 +221,11 @@ def postprocess(i): if not state['CM_MLPERF_RESULTS'][state['CM_SUT_CONFIG_NAME']][model].get(scenario): state['CM_MLPERF_RESULTS'][state['CM_SUT_CONFIG_NAME']][model][scenario] = {} state['CM_MLPERF_RESULTS'][state['CM_SUT_CONFIG_NAME']][model][scenario][mode] = result + if power: + state['CM_MLPERF_RESULTS'][state['CM_SUT_CONFIG_NAME']][model][scenario]['power'] = power + if power_efficiency: + state['CM_MLPERF_RESULTS'][state['CM_SUT_CONFIG_NAME']][model][scenario]['power_efficiency'] = power_efficiency + # Record basic host info host_info = { diff --git a/cm-mlops/script/get-ml-model-huggingface-zoo/README-extra.md b/cm-mlops/script/get-ml-model-huggingface-zoo/README-extra.md index 14279905a2..55304d1235 100644 --- a/cm-mlops/script/get-ml-model-huggingface-zoo/README-extra.md +++ b/cm-mlops/script/get-ml-model-huggingface-zoo/README-extra.md @@ -1,9 +1,5 @@ # Examples -```bash -cmr "get ml-model huggingface zoo _model-stub.alpindale/Llama-2-13b-ONNX" --model_filename=FP32/LlamaV2_13B_float32.onnx -``` - ```bash cmr "get ml-model huggingface zoo _model-stub.alpindale/Llama-2-13b-ONNX" --model_filename=FP32/LlamaV2_13B_float32.onnx --full_subfolder=FP32 ``` diff --git a/cm-mlops/script/get-mlperf-inference-utils/mlperf_utils.py b/cm-mlops/script/get-mlperf-inference-utils/mlperf_utils.py index 0a9ff3f3e6..4e7ccef56a 100644 --- a/cm-mlops/script/get-mlperf-inference-utils/mlperf_utils.py +++ b/cm-mlops/script/get-mlperf-inference-utils/mlperf_utils.py @@ -18,10 +18,12 @@ def get_result_from_log(version, model, scenario, result_path, mode): result = '' if mode == "performance": has_power = os.path.exists(os.path.join(result_path, "power")) - result = str(checker.get_performance_metric(config, mlperf_model, result_path, scenario, None, None, has_power)) + result_ = checker.get_performance_metric(config, mlperf_model, result_path, scenario, None, None, has_power) + result = str(round(result_, 3) if has_power: - is_valid, power_metric, scenario, avg_power_efficiency = checker.get_power_metric(config, scenario, result_path, True, result) - result += f",{power_metric},{avg_power_efficiency*1000} " + is_valid, power_metric, scenario, avg_power_efficiency = checker.get_power_metric(config, scenario, result_path, True, result_) + result += f",{power_metric},{avg_power_efficiency}" + elif mode == "accuracy" and os.path.exists(os.path.join(result_path, 'accuracy.txt')): @@ -32,9 +34,10 @@ def get_result_from_log(version, model, scenario, result_path, mode): result = str(round(float(acc_results[acc]), 5)) else: result = '(' + result_list = [] for i, acc in enumerate(acc_results): - result += str(round(float(acc_results[acc]), 5)) - result += ")" + result_list.append(str(round(float(acc_results[acc]), 5))) + result += ", ".join(result_list) + ")" return result @@ -129,7 +132,7 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res) performance_result_ = performance_result / 1000000 #convert to milliseconds else: performance_result_ = performance_result - result['performance'] = performance_result_ + result['performance'] = round(performance_result_, 3) if scenario != effective_scenario: inferred, inferred_result = checker.get_inferred_result(scenario, effective_scenario, performance_result, mlperf_log, config, False) @@ -170,7 +173,7 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res) if len(accuracy_results) == 1: accuracy_result = accuracy_results[0] else: - accuracy_result = "(" + ",".join(accuracy_results)+")" + accuracy_result = "(" + ", ".join(accuracy_results)+")" result['accuracy'] = accuracy_result result_string = f"\n\n## Results\n" @@ -217,4 +220,5 @@ def get_result_table(results): if results[model][scenario].get('power_efficiency','') != '': row.append(results[model][scenario]['power_efficiency']) table.append(row) + return table, headers diff --git a/cm-mlops/script/gui/playground_scripts.py b/cm-mlops/script/gui/playground_scripts.py index d8f1daebd0..cd831ff8dd 100644 --- a/cm-mlops/script/gui/playground_scripts.py +++ b/cm-mlops/script/gui/playground_scripts.py @@ -129,6 +129,7 @@ def page(st, params): A few other popular commands: ```bash +cmr "{}" --help cmr "{}" --shell cm run script "{}" --shell cm docker script "{}"