diff --git a/cm-mlops/automation/list_of_scripts.md b/cm-mlops/automation/list_of_scripts.md deleted file mode 100644 index ac822e1db9..0000000000 --- a/cm-mlops/automation/list_of_scripts.md +++ /dev/null @@ -1,37 +0,0 @@ -[ [Back to index](README.md) ] - - - -This is an automatically generated list of reusable CM scripts being developed -by the [open taskforce on automation and reproducibility](https://github.com/mlcommons/ck/issues/536) -to make MLOps and DevOps tools more interoperable, portable, deterministic and reproducible. -These scripts suppport the community effort to modularize ML Systems and automate their bechmarking, optimization, -design space exploration and deployment across continuously changing software and hardware. - -# List of CM scripts by categories - -
-Click here to see the table of contents. - -* [Platform information](#platform-information) - - -
- -### Platform information - -* [detect-os](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/detect-os) - - -# List of all sorted CM scripts - -* [detect-os](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/detect-os) - - - - -# Maintainers - -* [Open MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md)' diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index e7982a08a5..0a51cff7eb 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -30,7 +30,7 @@ def __init__(self, cmind, automation_file): self.run_state['deps'] = [] self.run_state['fake_deps'] = False self.run_state['parent'] = None - self.run_state['version_info'] = {} + self.run_state['version_info'] = [] self.file_with_cached_state = 'cm-cached-state.json' @@ -289,8 +289,8 @@ def run(self, i): if fake_deps: env['CM_TMP_FAKE_DEPS']='yes' run_state = i.get('run_state', self.run_state) - if run_state.get('version_info', '') == '': - run_state['version_info'] = {} + if not run_state.get('version_info', []): + run_state['version_info'] = [] if run_state.get('parent', '') == '': run_state['parent'] = None if fake_deps: @@ -643,7 +643,9 @@ def run(self, i): if i.get('help',False): return utils.call_internal_module(self, __file__, 'module_help', 'print_help', {'meta':meta, 'path':path}) - + run_state['script_id'] = meta['alias'] + "," + meta['uid'] + run_state['script_variation_tags'] = variation_tags + deps = meta.get('deps',[]) post_deps = meta.get('post_deps',[]) prehook_deps = meta.get('prehook_deps',[]) @@ -1314,6 +1316,8 @@ def run(self, i): utils.merge_dicts({'dict1':env, 'dict2':const, 'append_lists':True, 'append_unique':True}) utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True}) + run_script_input['run_state'] = run_state + ii = copy.deepcopy(customize_common_input) ii['env'] = env ii['state'] = state @@ -1582,22 +1586,26 @@ def run(self, i): if not version and detected_version: version = detected_version + if version: script_uid = script_artifact.meta.get('uid') script_alias = script_artifact.meta.get('alias') script_tags = script_artifact.meta.get('tags') - tags = i.get('tags') - run_state['version_info'][script_uid] = {} - run_state['version_info'][script_uid]['alias'] = script_alias - run_state['version_info'][script_uid]['script_tags'] = script_tags - run_state['version_info'][script_uid]['variation_tags'] = variation_tags - run_state['version_info'][script_uid]['version'] = version - + version_info = {} + version_info_tags = ",".join(script_tags + variation_tags) + version_info[version_info_tags] = {} + version_info[version_info_tags]['script_uid'] = script_uid + version_info[version_info_tags]['script_alias'] = script_alias + version_info[version_info_tags]['version'] = version + version_info[version_info_tags]['parent'] = run_state['parent'] + run_state['version_info'].append(version_info) script_versions = detected_versions.get(meta['uid'], []) if not script_versions: detected_versions[meta['uid']] = [ version ] else: script_versions.append(version) + else: + pass # these scripts don't have versions. Should we use cm mlops version here? ############################# RETURN elapsed_time = time.time() - start_time @@ -1617,6 +1625,11 @@ def run(self, i): with open('readme.md', 'w') as f: f.write(readme) + if i.get('dump_version_info'): + r = self._dump_version_info_for_script() + if r['return'] > 0: + return r + rr = {'return':0, 'env':env, 'new_env':new_env, 'state':state, 'new_state':new_state, 'deps': run_state['deps']} if i.get('json', False) or i.get('j', False): @@ -1631,6 +1644,12 @@ def run(self, i): return rr + def _dump_version_info_for_script(self, output_dir = os.getcwd()): + import json + with open(os.path.join(output_dir, 'version_info.json'), 'w') as f: + f.write(json.dumps(self.run_state['version_info'], indent=2)) + return {'return': 0} + def _update_state_from_variations(self, i, meta, variation_tags, variations, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, add_deps_recursive, run_state, recursion_spaces, verbose): # Save current explicit variations @@ -2686,7 +2705,9 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a tmp_run_state_deps = copy.deepcopy(run_state['deps']) run_state['deps'] = [] tmp_parent = run_state['parent'] - run_state['parent'] = self.meta['uid'] + run_state['parent'] = run_state['script_id']+":"+",".join(run_state['script_variation_tags']) + tmp_script_id = run_state['script_id'] + tmp_script_variation_tags = run_state['script_variation_tags'] # Run collective script via CM API: # Not very efficient but allows logging - can be optimized later @@ -2722,12 +2743,13 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a run_state['deps'] = tmp_run_state_deps run_state['parent'] = tmp_parent + run_state['script_id'] = tmp_script_id + run_state['script_variation_tags'] = tmp_script_variation_tags # Restore local env env.update(tmp_env) update_env_with_values(env) - return {'return': 0} ############################################################################## @@ -3974,6 +3996,8 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"): verbose = i.get('verbose', False) if not verbose: verbose = i.get('v', False) + show_time = i.get('time', False) + recursion = i.get('recursion', False) found_script_tags = i.get('found_script_tags', []) debug_script_tags = i.get('debug_script_tags', '') @@ -4143,10 +4167,9 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"): if customize_code is not None: print (recursion_spaces+' ! call "{}" from {}'.format(postprocess, customize_code.__file__)) - if len(posthook_deps)>0 and (postprocess == "postprocess"): r = script_automation._call_run_deps(posthook_deps, local_env_keys, local_env_keys_from_meta, env, state, const, const_state, - add_deps_recursive, recursion_spaces, remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, run_state) + add_deps_recursive, recursion_spaces, remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, ' ', run_state) if r['return']>0: return r if (postprocess == "postprocess") and customize_code is not None and 'postprocess' in dir(customize_code): diff --git a/cm-mlops/automation/utils/module.py b/cm-mlops/automation/utils/module.py index 2b479d5362..45d8802283 100644 --- a/cm-mlops/automation/utils/module.py +++ b/cm-mlops/automation/utils/module.py @@ -878,3 +878,81 @@ def uid(self, i): return r + + ############################################################################## + def system(self, i): + """ + Run system command and redirect output to string. + + Args: + (CM input dict): + + * cmd (str): command line + * (path) (str): go to this directory and return back to current + * (stdout) (str): stdout file + * (stderr) (str): stderr file + + Returns: + (CM return dict): + + * return (int): return code == 0 if no error and >0 if error + * (error) (str): error string if return>0 + + * ret (int): return code + * std (str): stdout + stderr + * stdout (str): stdout + * stderr (str): stderr + """ + + cmd = i['cmd'] + + if cmd == '': + return {'return':1, 'error': 'cmd is empty'} + + path = i.get('path','') + if path!='' and os.path.isdir(path): + cur_dir = os.getcwd() + os.chdir(path) + + if i.get('stdout','')!='': + fn1=i['stdout'] + fn1_delete = False + else: + r = utils.gen_tmp_file({}) + if r['return'] > 0: return r + fn1 = r['file_name'] + fn1_delete = True + + if i.get('stderr','')!='': + fn2=i['stderr'] + fn2_delete = False + else: + r = utils.gen_tmp_file({}) + if r['return'] > 0: return r + fn2 = r['file_name'] + fn2_delete = True + + cmd += ' > '+fn1 + ' 2> '+fn2 + rx = os.system(cmd) + + std = '' + stdout = '' + stderr = '' + + if os.path.isfile(fn1): + r = utils.load_txt(file_name = fn1, remove_after_read = fn1_delete) + if r['return'] == 0: stdout = r['string'].strip() + + if os.path.isfile(fn2): + r = utils.load_txt(file_name = fn2, remove_after_read = fn2_delete) + if r['return'] == 0: stderr = r['string'].strip() + + std = stdout + if stderr!='': + if std!='': std+='\n' + std+=stderr + + if path!='' and os.path.isdir(path): + os.chdir(cur_dir) + + return {'return':0, 'ret':rx, 'stdout':stdout, 'stderr':stderr, 'std':std} diff --git a/cm-mlops/cfg/benchmark-hardware-compute/amd-gpu.json b/cm-mlops/cfg/benchmark-hardware-compute/amd-gpu.json index 546794e597..d9c3ae64ed 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/amd-gpu.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/amd-gpu.json @@ -1,4 +1,5 @@ { "uid": "d8f06040f7294319", - "name": "AMD GPU" + "name": "AMD GPU", + "tags": "gpu,amd" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-arm64.json b/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-arm64.json index 869c62397a..2f72d88103 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-arm64.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-arm64.json @@ -1,4 +1,5 @@ { "uid":"357a972e79614903", - "name": "Generic CPU - Arm64" + "name": "Generic CPU - Arm64", + "tags": "cpu,arm64,generic" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-x64.json b/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-x64.json index 05531cb49e..6573ca1a42 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-x64.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/generic-cpu-x64.json @@ -1,4 +1,5 @@ { "uid": "cdfd424c32734e38", - "name": "Generic CPU - x64" + "name": "Generic CPU - x64", + "tags": "cpu,x64,generic" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/google-tpu.json b/cm-mlops/cfg/benchmark-hardware-compute/google-tpu.json index 59296cc2de..dbcf9c70e9 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/google-tpu.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/google-tpu.json @@ -1,4 +1,5 @@ { "uid": "b3be7ac9ef954f5a", - "name": "Google TPU" + "name": "Google TPU", + "tags": "tpu,google" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu-jetson-orin.json b/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu-jetson-orin.json index cd15f38c9a..8c0f86f2d3 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu-jetson-orin.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu-jetson-orin.json @@ -1,4 +1,5 @@ { "uid": "fe379ecd1e054a00", - "name": "Nvidia GPU - Jetson Orin" + "name": "Nvidia GPU - Jetson Orin", + "tags": "gpu,nvidia,jetson,orin" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu.json b/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu.json index f8fecaf95b..2cdeb0eefa 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/nvidia-gpu.json @@ -1,4 +1,5 @@ { "uid": "fe379ecd1e054a00", - "name": "Nvidia GPU" + "name": "Nvidia GPU", + "tags": "gpu,nvidia" } diff --git a/cm-mlops/cfg/benchmark-hardware-compute/qualcomm-ai100.json b/cm-mlops/cfg/benchmark-hardware-compute/qualcomm-ai100.json index 83e18562f4..33b3ac8abd 100644 --- a/cm-mlops/cfg/benchmark-hardware-compute/qualcomm-ai100.json +++ b/cm-mlops/cfg/benchmark-hardware-compute/qualcomm-ai100.json @@ -1,4 +1,5 @@ { - "uid": "fe379ecd1e054a00", - "name": "Qualcomm - AI 100" + "uid": "d2ae645066664463", + "name": "Qualcomm - AI 100", + "tags": "accelerator,acc,qualcomm,ai,100,ai-100" } diff --git a/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.yaml b/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.yaml deleted file mode 100644 index 6f2d290046..0000000000 --- a/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.yaml +++ /dev/null @@ -1,7 +0,0 @@ -uid: 125abafe58dc4473 - -name: "Any model - offline" - -supported_compute: -- cdfd424c32734e38 -- 357a972e79614903 diff --git a/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.md b/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.md similarity index 100% rename from cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-offline.md rename to cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.md diff --git a/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.yaml b/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.yaml new file mode 100644 index 0000000000..1d848ec00a --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-loadgen-cpp-dev/any-model-x64-offline.yaml @@ -0,0 +1,5 @@ +uid: 125abafe58dc4473 + +name: "Any model - x64 - offline" + +compute_uid: cdfd424c32734e38 diff --git a/cm-mlops/cfg/benchmark-run-loadgen-python-dev/any-model-offline.yaml b/cm-mlops/cfg/benchmark-run-loadgen-python-dev/any-model-offline.yaml index aefd1822f4..677ce45f05 100644 --- a/cm-mlops/cfg/benchmark-run-loadgen-python-dev/any-model-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-loadgen-python-dev/any-model-offline.yaml @@ -2,6 +2,4 @@ uid: db45dcd686854602 name: "Any model - offline" -supported_compute: -- cdfd424c32734e38 -- 357a972e79614903 +compute_uid: cdfd424c32734e38 diff --git a/cm-mlops/cfg/benchmark-run-mlperf-abtf-dev/retinanet-reference-python-torch-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-abtf-dev/retinanet-reference-python-torch-offline.yaml index 55bbec3719..f1fe61593f 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-abtf-dev/retinanet-reference-python-torch-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-abtf-dev/retinanet-reference-python-torch-offline.yaml @@ -2,5 +2,4 @@ uid: "fe379ecd1e054a00" name: "RetinaNet Reference Python Torch Offline" -supported_compute: -- cdfd424c32734e38 +compute_uid: cdfd424c32734e38 diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml index 11c0c31277..3793099289 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-latest/_cm.yaml @@ -16,6 +16,7 @@ name: "MLPerf inference - latest" supported_compute: - 357a972e79614903 - cdfd424c32734e38 +- d2ae645066664463 urls: - name: "Official page" @@ -24,3 +25,11 @@ urls: url: "https://github.com/mlcommons/inference" - name: "MLCommons CM automation (under development)" url: "https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference" + +dimensions: +- - input.model + - "MLPerf model" +- - input.implementation + - "MLPerf implementation" +- - input.framework + - "MLPerf framework" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/_cm.yaml new file mode 100644 index 0000000000..1f0c1a9ec0 --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/_cm.yaml @@ -0,0 +1,26 @@ +alias: benchmark-run-mlperf-inference-scc23 +uid: 9133e5b1dddc4e4a + +automation_alias: cfg +automation_uid: 88dce9c160324c5d + +tags: +- benchmark +- run +- mlperf +- inference +- v3.1 + +name: "MLPerf inference - Student Cluster Competition 2023" + +supported_compute: +- fe379ecd1e054a00 +- cdfd424c32734e38 +- fe379ecd1e054a00 +- d2ae645066664463 + +urls: +- name: "Official page" + url: "https://sc23.supercomputing.org/students/student-cluster-competition/" +- name: "Tutorial to run MLPerf inference benchmark " + url: "https://github.com/mlcommons/ck/blob/master/docs/tutorials/scc23-mlperf-inference-bert.md" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/base/_test.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/base/_test.yaml new file mode 100644 index 0000000000..2869ed71b6 --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/base/_test.yaml @@ -0,0 +1,3 @@ +name: "BASE" + +tags: "base" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.md b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.md @@ -0,0 +1 @@ +TBD diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml new file mode 100644 index 0000000000..fdc291b8be --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-scc23/bert-reference-python-onnx-offline.yaml @@ -0,0 +1,11 @@ +uid: 35e8895a1b714ed3 + +name: "BERT Reference Python ONNX Offline" + +compute_uid: cdfd424c32734e38 + +input: + model: bert + implementation: reference + framework: onnx + diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml index f59559bcd8..c174ee5aa5 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/_cm.yaml @@ -17,6 +17,7 @@ supported_compute: - fe379ecd1e054a00 - cdfd424c32734e38 - fe379ecd1e054a00 +- d2ae645066664463 urls: - name: "Official page" @@ -25,3 +26,13 @@ urls: url: "https://github.com/mlcommons/inference" - name: "MLCommons CM automation (under development)" url: "https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference" + +dimensions: +- - input.model + - "MLPerf model" +- - input.implementation + - "MLPerf implementation" +- - input.framework + - "MLPerf framework" +- - compute_meta.name + - "Compute" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/base/_demo.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/base/_demo.yaml new file mode 100644 index 0000000000..6c2c3145cd --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/base/_demo.yaml @@ -0,0 +1,2 @@ +# DEMO +base_demo: true diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.md b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.md @@ -0,0 +1 @@ +TBD diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml new file mode 100644 index 0000000000..2cd71103c7 --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-qaic-offline.yaml @@ -0,0 +1,14 @@ +uid: c6ae695138e74a29 + +name: "BERT QAIC Offline" + +compute_uid: cdfd424c32734e38 + +input: + model: bert + implementation: qaic + framework: qaic + +functional: true + +notes: "Notes" \ No newline at end of file diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.md b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.md @@ -0,0 +1 @@ +TBD diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml index b3a9f0dd1c..a8a93eb7a0 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/bert-reference-python-onnx-offline.yaml @@ -2,7 +2,12 @@ uid: 9eee8cb06621413a name: "BERT Reference Python ONNX Offline" -supported_compute: -- cdfd424c32734e38 -- 357a972e79614903 -- fe379ecd1e054a00 +compute_uid: cdfd424c32734e38 + +input: + model: bert + implementation: reference + framework: onnx + +functional: false + diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.md b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.md @@ -0,0 +1 @@ +TBD diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml index e3cc190aaf..7cc38c8058 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v3.1/gptj-reference-python-torch-offline.yaml @@ -1,6 +1,17 @@ +_base: "base/_demo.yaml" + uid: 53e4028a3b31400d -name: "GPT-J Reference Python Torch Offline" +name: " BERT Reference Python ONNX Offline" + +compute_uid: cdfd424c32734e38 + +tags: ",offline" + +input: + model: gptj + implementation: reference + framework: torch -supported_compute: -- fe379ecd1e054a00 +reproduced: true + \ No newline at end of file diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml index 2be6c36da5..522730bf2a 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/_cm.yaml @@ -17,6 +17,7 @@ supported_compute: - fe379ecd1e054a00 - cdfd424c32734e38 - fe379ecd1e054a00 +- d2ae645066664463 urls: - name: "Official page" @@ -25,3 +26,11 @@ urls: url: "https://github.com/mlcommons/inference" - name: "MLCommons CM automation (under development)" url: "https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference" + +dimensions: +- - input.model + - "MLPerf model" +- - input.implementation + - "MLPerf implementation" +- - input.framework + - "MLPerf framework" diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.md b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.md new file mode 100644 index 0000000000..6b81d9cd00 --- /dev/null +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.md @@ -0,0 +1 @@ +# TBD \ No newline at end of file diff --git a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.yaml b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.yaml index 831aa0347b..63653a9633 100644 --- a/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.yaml +++ b/cm-mlops/cfg/benchmark-run-mlperf-inference-v4.0/llama2-reference-python-torch-offline.yaml @@ -1,6 +1,9 @@ +_base: "benchmark-run-mlperf-inference-v3.1,8eb42e27ec984185:base/_demo.yaml" + uid: 4df38ed8dd804678 -name: "LLAMA2 Reference Python Torch Offline" +name: " Offline" + +compute_uid: fe379ecd1e054a00 -supported_compute: -- fe379ecd1e054a00 +tags: ",llama2,offline" diff --git a/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml b/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml index b1cd539b0c..81348987e0 100644 --- a/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml @@ -151,6 +151,9 @@ post_deps: CM_MLPERF_SKIP_RUN: - "yes" + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state # Variations to customize dependencies variations: diff --git a/cm-mlops/script/app-mlperf-inference-cpp/customize.py b/cm-mlops/script/app-mlperf-inference-cpp/customize.py index aeca71a9b1..bd4910b26d 100644 --- a/cm-mlops/script/app-mlperf-inference-cpp/customize.py +++ b/cm-mlops/script/app-mlperf-inference-cpp/customize.py @@ -82,28 +82,6 @@ def preprocess(i): def postprocess(i): env = i['env'] - if env.get('CM_MLPERF_README', '') == "yes": - import cmind as cm - inp = i['input'] - state = i['state'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'print_deps': True, - 'env': env, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + state = i['state'] return {'return':0} diff --git a/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml b/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml index 31dc6ed7ea..0e617ad49f 100644 --- a/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml @@ -574,7 +574,10 @@ posthook_deps: CM_MLPERF_SKIP_RUN: - "on" - +post_deps: + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state # Variations to customize dependencies variations: diff --git a/cm-mlops/script/app-mlperf-inference-reference/customize.py b/cm-mlops/script/app-mlperf-inference-reference/customize.py index c0563ec0d6..f425411d77 100644 --- a/cm-mlops/script/app-mlperf-inference-reference/customize.py +++ b/cm-mlops/script/app-mlperf-inference-reference/customize.py @@ -195,7 +195,9 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio if os_info['platform'] == 'windows': cmd = "python python/main.py --profile "+env['CM_MODEL']+"-"+env['CM_MLPERF_BACKEND'] + \ " --model=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_PREPROCESSED_PATH'] + \ - " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + \ + " --output " + env['OUTPUT_DIR'] + " " + \ + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + dataset_options else: cmd = "./run_local.sh " + env['CM_MLPERF_BACKEND'] + ' ' + \ @@ -359,29 +361,8 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio def postprocess(i): env = i['env'] + state = i['state'] - if env.get('CM_MLPERF_README', "") == "yes": - import cmind as cm - inp = i['input'] - state = i['state'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'env': env, - 'print_deps': True, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + inp = i['input'] return {'return':0} diff --git a/cm-mlops/script/app-mlperf-inference-tflite-cpp/_cm.json b/cm-mlops/script/app-mlperf-inference-tflite-cpp/_cm.json index d43c46d724..318900b7dd 100644 --- a/cm-mlops/script/app-mlperf-inference-tflite-cpp/_cm.json +++ b/cm-mlops/script/app-mlperf-inference-tflite-cpp/_cm.json @@ -252,6 +252,12 @@ "yes" ] } + }, + { + "tags": "save,mlperf,inference,state", + "names": [ + "save-mlperf-inference-state" + ] } ], "tags": [ diff --git a/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py b/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py index 9ff3d920b0..ebd588c9f2 100644 --- a/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py +++ b/cm-mlops/script/app-mlperf-inference-tflite-cpp/customize.py @@ -91,28 +91,6 @@ def preprocess(i): def postprocess(i): env = i['env'] - if env.get('CM_MLPERF_README', '') == "yes": - import cmind as cm - inp = i['input'] - state = i['state'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'env': env, - 'print_deps': True, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + state = i['state'] return {'return':0} diff --git a/cm-mlops/script/app-mlperf-inference/_cm.yaml b/cm-mlops/script/app-mlperf-inference/_cm.yaml index cb31003de3..65ac2aace3 100644 --- a/cm-mlops/script/app-mlperf-inference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference/_cm.yaml @@ -245,6 +245,10 @@ variations: backend: onnxruntime nvidia-original: + docker: + extra_run_args: ' --runtime=nvidia --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public + docker:os_version: "20.04" default_variations: backend: tensorrt device: cuda @@ -1128,3 +1132,19 @@ input_description: gui: title: "CM GUI for the MLPerf inference benchmark" + +docker: + skip_run_cmd: 'no' + shm_size: '32gb' + extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + docker_os: ubuntu + docker_real_run: False + interactive: True + docker_os_version: '22.04' + docker_input_mapping: + imagenet_path: IMAGENET_PATH + gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH + criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH + results_dir: RESULTS_DIR + submission_dir: SUBMISSION_DIR + dlrm_data_path: DLRM_DATA_PATH diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 21b95b67d2..d716e1d94f 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -1,14 +1,18 @@ from cmind import utils + import os import json import shutil import subprocess import copy import cmind as cm +import platform +import sys def preprocess(i): env = i['env'] + state = i['state'] if env.get('CM_MLPERF_IMPLEMENTATION', '') == 'nvidia-original': if env.get('CM_NVIDIA_GPU_NAME', '') in [ "rtx_4090", "a100", "t4", "l4", "orin", "custom" ]: @@ -21,12 +25,20 @@ def preprocess(i): env['CM_NVIDIA_HARNESS_GPU_VARIATION'] = '' if 'cmd' in i['input']: - i['state']['mlperf_inference_run_cmd'] = "cm run script " + " ".join(i['input']['cmd']) + state['mlperf_inference_run_cmd'] = "cm run script " + " ".join(i['input']['cmd']) + + state['mlperf-inference-implementation'] = {} + + run_state = i['run_script_input']['run_state'] + state['mlperf-inference-implementation']['script_id'] = run_state['script_id']+":"+",".join(run_state['script_variation_tags']) return {'return':0} def postprocess(i): + os_info = i['os_info'] + + xsep = '^' if os_info['platform'] == 'windows' else '\\' env = i['env'] inp = i['input'] @@ -191,16 +203,124 @@ def postprocess(i): shutil.copy(env['CM_MLPERF_USER_CONF'], 'user.conf') + # Record basic host info + host_info = { + "os_version":platform.platform(), + "cpu_version":platform.processor(), + "python_version":sys.version, + "cm_version":cm.__version__ + } + + x = '' + if env.get('CM_HOST_OS_FLAVOR','')!='': x+=env['CM_HOST_OS_FLAVOR'] + if env.get('CM_HOST_OS_VERSION','')!='': x+=' '+env['CM_HOST_OS_VERSION'] + if x!='': host_info['os_version_sys'] = x + + if env.get('CM_HOST_SYSTEM_NAME','')!='': host_info['system_name']=env['CM_HOST_SYSTEM_NAME'] + + # Check CM automation repository + repo_name = 'mlcommons@ck' + repo_hash = '' + r = cm.access({'action':'find', 'automation':'repo', 'artifact':'mlcommons@ck,a4705959af8e447a'}) + if r['return']==0 and len(r['list'])==1: + repo_path = r['list'][0].path + if os.path.isdir(repo_path): + repo_name = os.path.basename(repo_path) + + # Check Grigori's dev + if repo_name == 'ck': repo_name = 'ctuning@mlcommons-ck' + + r = cm.access({'action':'system', + 'automation':'utils', + 'path':repo_path, + 'cmd':'git rev-parse HEAD'}) + if r['return'] == 0 and r['ret'] == 0: + repo_hash = r['stdout'] + + host_info['cm_repo_name'] = repo_name + host_info['cm_repo_git_hash'] = repo_hash + + # Check a few important MLCommons repos + xhashes = [] + md_xhashes = '' + + for x in [('get,git,inference', ['inference']), + ('get,git,mlperf,power', ['power-dev'])]: + xtags = x[0] + xdirs = x[1] + + rx = cm.access({'action':'find', 'automation':'cache', 'tags':xtags}) + if rx['return']>0: return rx + for cache in rx['list']: + xurl = '' + xhash = '' + + for xd in xdirs: + xpath = os.path.join(cache.path, xd) + print (xpath) + if os.path.isdir(xpath): + r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git rev-parse HEAD'}) + if r['return'] == 0 and r['ret'] == 0: + xhash = r['stdout'] + + r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git config --get remote.origin.url'}) + if r['return'] == 0 and r['ret'] == 0: + xurl = r['stdout'] + + if xurl!='' and xhash!='': + break + + if xurl!='' and xhash!='': + # Check if doesn't exist + found = False + + for xh in xhashes: + if xh['mlcommons_git_url'] == xurl and xh['mlcommons_git_hash'] == xhash: + found = True + break + + if not found: + xhashes.append({'mlcommons_git_url': xurl, + 'mlcommons_git_hash': xhash, + 'cm_cache_tags':cache.meta['tags']}) + + md_xhashes +='* MLCommons Git {} ({})\n'.format(xurl, xhash) + + if len(xhashes)>0: + host_info['mlcommons_repos'] = xhashes + + with open ("cm-host-info.json", "w") as fp: + fp.write(json.dumps(host_info, indent=2)+'\n') + + # Prepare README if "cmd" in inp: cmd = "cm run script \\\n\t"+" \\\n\t".join(inp['cmd']) + xcmd = "cm run script "+xsep+"\n\t" + (" "+xsep+"\n\t").join(inp['cmd']) else: cmd = "" + xcmd = "" + + readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/ck).\n\n" + + readme_init+= "*Check [CM MLPerf docs](https://github.com/mlcommons/ck/tree/master/docs/mlperf) for more details.*\n\n" - readme_init = "This experiment is generated using [MLCommons CM](https://github.com/mlcommons/ck)\n" - readme_body = "## CM Run Command\n```\n" + cmd + "\n```" + readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n{}\n\n".format(platform.platform(), + platform.processor(), sys.version, cm.__version__, md_xhashes) + x = repo_name + if repo_hash!='': x+=' --checkout='+str(repo_hash) + + readme_body += "## CM Run Command\n\nSee [CM installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md).\n\n"+ \ + "```bash\npip install cmind\n\ncm rm cache -f\n\ncm pull repo {}\n\n{}\n```".format(x, xcmd) + + readme_body += "\n*Note that if you want to use the [latest automation recipes](https://access.cknowledge.org/playground/?action=scripts) for MLPerf (CM scripts),\n"+ \ + " you should simply reload {} without checkout and clean CM cache as follows:*\n\n".format(repo_name) + \ + "```bash\ncm rm repo {}\ncm pull repo {}\ncm rm cache -f\n\n```".format(repo_name, repo_name) + + extra_readme_init = '' + extra_readme_body = '' if env.get('CM_MLPERF_README', '') == "yes": - readme_body += "\n## Dependent CM scripts \n" + extra_readme_body += "\n## Dependent CM scripts\n\n" script_tags = inp['tags'] script_adr = inp.get('adr', {}) @@ -222,23 +342,26 @@ def postprocess(i): print_deps = r['new_state']['print_deps'] count = 1 for dep in print_deps: - readme_body += "\n\n" + str(count) +". `" +dep+ "`\n" + extra_readme_body += "\n\n" + str(count) +". `" +dep+ "`\n" count = count+1 if state.get('mlperf-inference-implementation') and state['mlperf-inference-implementation'].get('print_deps'): - readme_body += "\n## Dependent CM scripts for the MLPerf Inference Implementation\n" + extra_readme_body += "\n## Dependent CM scripts for the MLPerf Inference Implementation\n" print_deps = state['mlperf-inference-implementation']['print_deps'] count = 1 for dep in print_deps: - readme_body += "\n\n" + str(count) +". `" +dep+"`\n" + extra_readme_body += "\n\n" + str(count) +". `" +dep+"`\n" count = count+1 readme = readme_init + readme_body + extra_readme = extra_readme_init + extra_readme_body with open ("README.md", "w") as fp: fp.write(readme) + with open ("README-extra.md", "w") as fp: + fp.write(extra_readme) elif mode == "compliance": @@ -307,4 +430,32 @@ def postprocess(i): if accuracy_result_dir != '': env['CM_MLPERF_ACCURACY_RESULTS_DIR'] = accuracy_result_dir + if state.get('mlperf-inference-implementation') and state['mlperf-inference-implementation'].get('version_info'): + with open(os.path.join(output_dir, "cm-version-info.json"), "w") as f: + f.write(json.dumps(state['mlperf-inference-implementation']['version_info'], indent=2)) + + if env.get('CM_DUMP_SYSTEM_INFO', True): + dump_script_output("detect,os", env, state, 'new_env', os.path.join(output_dir, "os_info.json")) + dump_script_output("detect,cpu", env, state, 'new_env', os.path.join(output_dir, "cpu_info.json")) + dump_script_output("dump,pip,freeze", env, state, 'new_state', os.path.join(output_dir, "pip_freeze.json")) + return {'return':0} + +def dump_script_output(script_tags, env, state, output_key, dump_file): + + cm_input = {'action': 'run', + 'automation': 'script', + 'tags': script_tags, + 'env': env, + 'state': state, + 'quiet': True, + 'silent': True, + } + r = cm.access(cm_input) + if r['return'] > 0: + return r + with open(dump_file, "w") as f: + f.write(json.dumps(r[output_key], indent=2)) + + return {'return': 0} + diff --git a/cm-mlops/script/benchmark-program/customize.py b/cm-mlops/script/benchmark-program/customize.py index 2593ebbac4..b4bb9e3f40 100644 --- a/cm-mlops/script/benchmark-program/customize.py +++ b/cm-mlops/script/benchmark-program/customize.py @@ -34,12 +34,17 @@ def preprocess(i): if env.get('CM_RUN_DIR','') == '': env['CM_RUN_DIR'] = os.getcwd() + env['CM_RUN_CMD'] = CM_RUN_PREFIX + ' ' + os.path.join(env['CM_RUN_DIR'],env['CM_BIN_NAME']) + ' ' + env['CM_RUN_SUFFIX'] x = env.get('CM_RUN_PREFIX0','') if x!='': env['CM_RUN_CMD'] = x + ' ' + env.get('CM_RUN_CMD','') + if env.get('CM_HOST_OS_TYPE', '') != 'windows' and str(env.get('CM_SAVE_CONSOLE_LOG', True)).lower() not in [ "no", "false", "0"]: + logs_dir = env.get('CM_LOGS_DIR', env['CM_RUN_DIR']) + env['CM_RUN_CMD'] += " 2>&1 | tee " + os.path.join(logs_dir, "console.out") + # Print info print ('***************************************************************************') print ('CM script::benchmark-program/run.sh') diff --git a/cm-mlops/script/dump-pip-freeze/_cm.yaml b/cm-mlops/script/dump-pip-freeze/_cm.yaml new file mode 100644 index 0000000000..39acd5eee3 --- /dev/null +++ b/cm-mlops/script/dump-pip-freeze/_cm.yaml @@ -0,0 +1,16 @@ +alias: dump-pip-freeze +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +tags: +- dump +- pip +- freeze +new_state_keys: + - pip_freeze +deps: + - tags: get,python + names: + - python + - python3 +uid: 33eb0a8006664cae diff --git a/cm-mlops/script/dump-pip-freeze/customize.py b/cm-mlops/script/dump-pip-freeze/customize.py new file mode 100644 index 0000000000..aef0a981df --- /dev/null +++ b/cm-mlops/script/dump-pip-freeze/customize.py @@ -0,0 +1,48 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + state = i['state'] + + os_info = i['os_info'] + + automation = i['automation'] + + pip_freeze = {} + if not os.path.isfile('tmp-pip-freeze'): + # If was not created, sometimes issues on Windows + # There is another workaround + if os_info['platform'] == 'windows': + r = automation.cmind.access({'action':'system', + 'automation':'utils', + 'cmd':'py -m pip freeze', + 'stdout':'tmp-pip-freeze'}) + # skip output + + if os.path.isfile('tmp-pip-freeze'): + with open("tmp-pip-freeze", "r") as f: + for line in f.readlines(): + if "==" in line: + split = line.split("==") + pip_freeze[split[0]] = split[1].strip() + + + state['pip_freeze'] = pip_freeze + + return {'return':0} diff --git a/cm-mlops/script/dump-pip-freeze/dump.py b/cm-mlops/script/dump-pip-freeze/dump.py new file mode 100644 index 0000000000..d74507ccf6 --- /dev/null +++ b/cm-mlops/script/dump-pip-freeze/dump.py @@ -0,0 +1,19 @@ +import os +from pip._internal.operations import freeze + +if os.path.isfile('tmp-pip-freeze'): + os.remove('tmp-pip-freeze') + +pkgs = freeze.freeze() + +x = '' + +try: + for pkg in pkgs: + x+=pkg+'\n' +except: + pass + +if len(x)>0: + with open('tmp-pip-freeze', "w") as f: + f.write(x) diff --git a/cm-mlops/script/dump-pip-freeze/run.bat b/cm-mlops/script/dump-pip-freeze/run.bat new file mode 100644 index 0000000000..b323ddc22e --- /dev/null +++ b/cm-mlops/script/dump-pip-freeze/run.bat @@ -0,0 +1,4 @@ +if not "%CM_FAKE_RUN%" == "yes" ( + %CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\dump.py + IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% +) diff --git a/cm-mlops/script/get-mlperf-inference-submission-dir/run.sh b/cm-mlops/script/dump-pip-freeze/run.sh similarity index 76% rename from cm-mlops/script/get-mlperf-inference-submission-dir/run.sh rename to cm-mlops/script/dump-pip-freeze/run.sh index eb5ce24565..a1cdb52eb4 100644 --- a/cm-mlops/script/get-mlperf-inference-submission-dir/run.sh +++ b/cm-mlops/script/dump-pip-freeze/run.sh @@ -25,8 +25,4 @@ function run() { #Add your run commands here... # run "$CM_RUN_CMD" - -scratch_path=${CM_NVIDIA_MLPERF_SCRATCH_PATH} -mkdir -p ${scratch_path}/data -mkdir -p ${scratch_path}/preprocessed_data -mkdir -p ${scratch_path}/models +run "${CM_PYTHON_BIN_WITH_PATH} ${CM_TMP_CURRENT_SCRIPT_PATH}/dump.py" diff --git a/cm-mlops/script/generate-mlperf-inference-submission/customize.py b/cm-mlops/script/generate-mlperf-inference-submission/customize.py index 0db498bb13..ddf4d7656e 100644 --- a/cm-mlops/script/generate-mlperf-inference-submission/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-submission/customize.py @@ -2,6 +2,7 @@ import os import json import shutil +import cmind def preprocess(i): return {'return': 0} @@ -79,7 +80,7 @@ def generate_submission(i): print('* MLPerf inference submitter: {}'.format(submitter)) if 'Collective' not in system_meta.get('sw_notes'): - system_meta['sw_notes'] = "Powered by MLCommons Collective Mind framework (CK2). " + system_meta['sw_notes'] + system_meta['sw_notes'] = "Automated by MLCommons CM v{}. ".format(cmind.__version__) + system_meta['sw_notes'] if env.get('CM_MLPERF_SUT_SW_NOTES_EXTRA','') != '': sw_notes = f"{system_meta['sw_notes']} {env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}" @@ -171,6 +172,7 @@ def generate_submission(i): scenarios.append("multistream") modes = [f for f in os.listdir(result_scenario_path) if not os.path.isfile(os.path.join(result_scenario_path, f))] + power_run = False for mode in modes: result_mode_path = os.path.join(result_scenario_path, mode) submission_mode_path = os.path.join(submission_scenario_path, mode) @@ -187,7 +189,6 @@ def generate_submission(i): os.makedirs(submission_measurement_path) if mode=='performance': - power_run = False if os.path.exists(os.path.join(result_mode_path, "power")): power_run = True @@ -261,9 +262,11 @@ def generate_submission(i): files.append(f) elif f == "spl.txt": files.append(f) - elif f == "README.md": - readme = True + elif f in [ "README.md", "README-extra.md", "cm-version-info.json", "os_info.json", "cpu_info.json", "pip_freeze.json" ] and mode == "performance": shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, f)) + elif f in [ "console.out" ]: + shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, mode+"_"+f)) + if mode == "accuracy": if os.path.exists(os.path.join(result_mode_path, "accuracy.txt")): @@ -276,12 +279,122 @@ def generate_submission(i): p_target = os.path.join(submission_results_path, f) shutil.copy(os.path.join(result_mode_path, f), p_target) - if not readme and mode in [ "performance", "accuracy" ]: - with open(os.path.join(submission_measurement_path, "README.md"), mode='w') as f: - f.write("TBD") #create an empty README + readme_file = os.path.join(submission_measurement_path, "README.md") + if not os.path.exists(readme_file): + with open(readme_file, mode='w') as f: + f.write("TBD") #create an empty README + else: + readme_suffix = "" + result_string = get_result_string(env['CM_MLPERF_LAST_RELEASE'], model, scenario, result_scenario_path, power_run) + with open(readme_file, mode='a') as f: + f.write(result_string) + return {'return':0} + +def get_accuracy_metric(config, model, path): + + import submission_checker as checker + import re + is_valid = False + all_accuracy_valid = True + acc = None + result_acc = None + target = config.get_accuracy_target(model) + acc_upper_limit = config.get_accuracy_upper_limit(model) + patterns = [] + acc_targets = [] + acc_limits = [] + up_patterns = [] + acc_types = [] + + if acc_upper_limit is not None: + acc_limit_check = True + for i in range(0, len(acc_upper_limit), 2): + acc_type, acc_target = acc_upper_limit[i:i+2] + acc_limits.append(acc_target) + up_patterns.append(checker.ACC_PATTERN[acc_type]) + + for i in range(0, len(target), 2): + acc_type, acc_target = target[i:i+2] + acc_types.append(acc_type) + patterns.append(checker.ACC_PATTERN[acc_type]) + acc_targets.append(acc_target) + + acc_seen = [False for _ in acc_targets] + acc_results = {} + with open(os.path.join(path, "accuracy.txt"), "r", encoding="utf-8") as f: + for line in f: + for i, (pattern, acc_target, acc_type) in enumerate(zip(patterns, acc_targets, acc_types)): + m = re.match(pattern, line) + if m: + acc = m.group(1) + + acc_results[acc_type] = acc + + if acc is not None and float(acc) >= acc_target: + all_accuracy_valid &= True + acc_seen[i] = True + elif acc is not None: + all_accuracy_valid = False + #log.warning("%s accuracy not met: expected=%f, found=%s", path, acc_target, acc) + if i == 0 and acc: + result_acc = acc + acc = None + if acc_upper_limit is not None: + for i, (pattern, acc_limit) in enumerate(zip(up_patterns, acc_limits)): + m = re.match(pattern, line) + if m: + acc = m.group(1) + if acc is not None and acc_upper_limit is not None and float(acc) > acc_limit: + acc_limit_check = False + #log.warning("%s accuracy not met: upper limit=%f, found=%s", path, acc_limit, acc) + acc = None + if all(acc_seen): + break; + is_valid = all_accuracy_valid & all(acc_seen) + if acc_upper_limit is not None: + is_valid &= acc_limit_check + + + return acc_results, acc_targets, acc_limits, up_patterns + + +def get_result_string(version, model, scenario, result_path, has_power): + import submission_checker as checker + config = checker.Config( + version, + None, + ignore_uncommited=False, + skip_power_check=False, + ) + mlperf_model = config.get_mlperf_model(model) + performance_path = os.path.join(result_path, "performance", "run_1") + accuracy_path = os.path.join(result_path, "accuracy") + performance_result = checker.get_performance_metric(config, mlperf_model, performance_path, scenario, None, None, has_power) + if has_power: + is_valid, power_metric, scenario, avg_power_efficiency = checker.get_power_metric(config, scenario, performance_path, True, performance_result) + power_result_string = power_metric + acc_results, acc_targets, acc_limits, up_patterns = get_accuracy_metric(config, mlperf_model, accuracy_path) + + result_field = checker.RESULT_FIELD[checker.SCENARIO_MAPPING[scenario]] + + performance_result_string = f"{result_field}: {performance_result}\n" + accuracy_result_string = '' + for i, acc in enumerate(acc_results): + accuracy_result_string += f"{acc}: {acc_results[acc]}" + if not up_patterns: + accuracy_result_string += f", Required accuracy for closed division >= {round(acc_targets[i], 5)}" + + result_string = "\n\n## Results \n" + result_string += "### Accuracy Results \n" + accuracy_result_string + result_string += "\n\n### Performance Results \n" + performance_result_string + if has_power: + result_string += "\n\n### Power Results \n" + power_result_string + + return result_string + def postprocess(i): r = generate_submission(i) diff --git a/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml b/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml index dedc119fbe..9d19ad8fc9 100644 --- a/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml +++ b/cm-mlops/script/generate-mlperf-inference-user-conf/_cm.yaml @@ -50,6 +50,7 @@ input_mapping: # Env keys which are exposed to higher level scripts new_env_keys: - CM_MLPERF_* + - CM_LOGS_DIR - CM_HW_* - CM_SUT_* - CM_MAX_EXAMPLES @@ -72,7 +73,11 @@ deps: - python - python3 - + - tags: get,mlperf,results,dir + names: + - get-mlperf-results-dir + skip_if_env: + OUTPUT_BASE_DIR: [ on ] ######################################################################## # Install MLPerf inference dependencies diff --git a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py index 6f1b8b5b39..38fca24330 100644 --- a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py @@ -35,8 +35,8 @@ def preprocess(i): env['CM_MLPERF_LOADGEN_MODE'] = "accuracy" - if 'OUTPUT_BASE_DIR' not in env: - env['OUTPUT_BASE_DIR'] = os.getcwd() + if env.get('OUTPUT_BASE_DIR', '') == '': + env['OUTPUT_BASE_DIR'] = env.get('CM_MLPERF_INFERENCE_RESULTS_DIR', os.getcwd()) if 'CM_NUM_THREADS' not in env: if 'CM_MINIMIZE_THREADS' in env: @@ -216,6 +216,7 @@ def preprocess(i): env['CM_MLPERF_INFERENCE_AUDIT_PATH'] = audit_full_path env['CM_MLPERF_OUTPUT_DIR'] = OUTPUT_DIR + env['CM_LOGS_DIR'] = OUTPUT_DIR env['CM_MLPERF_LOADGEN_LOGS_DIR'] = OUTPUT_DIR run_exists = run_files_exist(log_mode, OUTPUT_DIR, required_files, env) diff --git a/cm-mlops/script/get-dataset-openimages/run.bat b/cm-mlops/script/get-dataset-openimages/run.bat index 3b1b6a15e8..742542d251 100644 --- a/cm-mlops/script/get-dataset-openimages/run.bat +++ b/cm-mlops/script/get-dataset-openimages/run.bat @@ -18,5 +18,7 @@ if not "%CM_DATASET_SIZE%" == "" ( %CM_PYTHON_BIN% tools\openimages.py %MAX_IMAGES% --dataset-dir=%INSTALL_DIR% --output-labels=openimages-mlperf.json --classes %CM_DATASET_OPENIMAGES_CLASSES% IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% +rem Next is a hack to support MLPerf inference on Windows cd %INSTALL_DIR% -move validation\data\* . \ No newline at end of file +if not exist validation\data\annotations mkdir validation\data\annotations +copy annotations\* validation\data\annotations diff --git a/cm-mlops/script/get-git-repo/customize.py b/cm-mlops/script/get-git-repo/customize.py index 044b884740..4b9d7863b7 100644 --- a/cm-mlops/script/get-git-repo/customize.py +++ b/cm-mlops/script/get-git-repo/customize.py @@ -59,6 +59,11 @@ def postprocess(i): env['CM_GET_DEPENDENT_CACHED_PATH'] = git_checkout_path + if os.path.exists("tmp-cm-git-hash.out"): + with open("tmp-cm-git-hash.out", "r") as f: + git_hash = f.readline().strip() + env['CM_GIT_REPO_CURRENT_HASH'] = git_hash + return {'return':0} def get_env_key(env): diff --git a/cm-mlops/script/get-git-repo/run.sh b/cm-mlops/script/get-git-repo/run.sh index 60fc0336b8..cf3c125d39 100644 --- a/cm-mlops/script/get-git-repo/run.sh +++ b/cm-mlops/script/get-git-repo/run.sh @@ -11,7 +11,7 @@ if [ ! -d "${CM_TMP_GIT_PATH}" ]; then echo "${CM_GIT_CLONE_CMD}"; ${CM_GIT_CLONE_CMD} - if [ "${?}" != "0" ]; then exit 1; fi + if [ "${?}" != "0" ]; then exit $?; fi cd ${folder} @@ -21,10 +21,9 @@ if [ ! -d "${CM_TMP_GIT_PATH}" ]; then cmd="git checkout -b ${CM_GIT_SHA} ${CM_GIT_SHA}" echo "$cmd" eval "$cmd" - fi - if [ "${?}" != "0" ]; then exit 1; fi + if [ "${?}" != "0" ]; then exit $?; fi - if [ ! -z ${CM_GIT_CHECKOUT_TAG} ]; then + elif [ ! -z ${CM_GIT_CHECKOUT_TAG} ]; then echo "" cmd="git fetch --all --tags" @@ -33,8 +32,14 @@ if [ ! -d "${CM_TMP_GIT_PATH}" ]; then cmd="git checkout tags/${CM_GIT_CHECKOUT_TAG} -b ${CM_GIT_CHECKOUT_TAG}" echo "$cmd" eval "$cmd" + if [ "${?}" != "0" ]; then exit $?; fi + + else + cmd="git rev-parse HEAD >> ../tmp-cm-git-hash.out" + echo "$cmd" + eval "$cmd" + if [ "${?}" != "0" ]; then exit $?; fi fi - if [ "${?}" != "0" ]; then exit 1; fi else cd ${folder} @@ -47,7 +52,7 @@ for submodule in "${submodules[@]}" do echo "Initializing submodule ${submodule}" git submodule update --init "${submodule}" - if [ "${?}" != "0" ]; then exit 1; fi + if [ "${?}" != "0" ]; then exit $?; fi done if [ ${CM_GIT_PATCH} == "yes" ]; then @@ -56,7 +61,7 @@ if [ ${CM_GIT_PATCH} == "yes" ]; then do echo "Applying patch $patch_file" git apply "$patch_file" - if [ "${?}" != "0" ]; then exit 1; fi + if [ "${?}" != "0" ]; then exit $?; fi done fi cd "$CUR_DIR" diff --git a/cm-mlops/script/get-ml-model-gptj/_cm.json b/cm-mlops/script/get-ml-model-gptj/_cm.json index ef23620480..57a0b40615 100644 --- a/cm-mlops/script/get-ml-model-gptj/_cm.json +++ b/cm-mlops/script/get-ml-model-gptj/_cm.json @@ -181,7 +181,6 @@ }, "wget": { "group": "download-tool", - "default": true, "add_deps_recursive": { "dae": { "tags": "_wget" @@ -194,6 +193,7 @@ }, "rclone": { "group": "download-tool", + "default": true, "add_deps_recursive": { "dae": { "tags": "_rclone" diff --git a/cm-mlops/script/get-mlperf-inference-results-dir/run.bat b/cm-mlops/script/get-mlperf-inference-results-dir/run.bat deleted file mode 100644 index 648302ca71..0000000000 --- a/cm-mlops/script/get-mlperf-inference-results-dir/run.bat +++ /dev/null @@ -1 +0,0 @@ -rem native script diff --git a/cm-mlops/script/get-mlperf-inference-results-dir/run.sh b/cm-mlops/script/get-mlperf-inference-results-dir/run.sh deleted file mode 100644 index eb5ce24565..0000000000 --- a/cm-mlops/script/get-mlperf-inference-results-dir/run.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} - -#To export any variable -#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out - -#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency - - - -function exit_if_error() { - test $? -eq 0 || exit $? -} - -function run() { - echo "Running: " - echo "$1" - echo "" - if [[ ${CM_FAKE_RUN} != 'yes' ]]; then - eval "$1" - exit_if_error - fi -} - -#Add your run commands here... -# run "$CM_RUN_CMD" - -scratch_path=${CM_NVIDIA_MLPERF_SCRATCH_PATH} -mkdir -p ${scratch_path}/data -mkdir -p ${scratch_path}/preprocessed_data -mkdir -p ${scratch_path}/models diff --git a/cm-mlops/script/get-mlperf-inference-src/customize.py b/cm-mlops/script/get-mlperf-inference-src/customize.py index 80e28002da..8df8c3ed88 100644 --- a/cm-mlops/script/get-mlperf-inference-src/customize.py +++ b/cm-mlops/script/get-mlperf-inference-src/customize.py @@ -79,6 +79,9 @@ def postprocess(i): state['CM_MLPERF_INFERENCE_MODELS'] = valid_models + if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '': + env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH'] + return {'return':0} diff --git a/cm-mlops/script/get-mlperf-inference-submission-dir/run.bat b/cm-mlops/script/get-mlperf-inference-submission-dir/run.bat deleted file mode 100644 index 648302ca71..0000000000 --- a/cm-mlops/script/get-mlperf-inference-submission-dir/run.bat +++ /dev/null @@ -1 +0,0 @@ -rem native script diff --git a/cm-mlops/script/get-mlperf-inference-sut-description/hardware/default.json b/cm-mlops/script/get-mlperf-inference-sut-description/hardware/default.json index 7cc293c1d0..b7cf960dbb 100644 --- a/cm-mlops/script/get-mlperf-inference-sut-description/hardware/default.json +++ b/cm-mlops/script/get-mlperf-inference-sut-description/hardware/default.json @@ -20,7 +20,7 @@ "number_of_nodes": "1", "status": "available", "submitter": "cTuning", - "sw_notes": "Powered by MLCommons CM (CK2)", + "sw_notes": "Automated by MLCommons CM", "system_type": "edge", "system_type_detail": "edge server" } diff --git a/cm-mlops/script/get-mlperf-power-dev/customize.py b/cm-mlops/script/get-mlperf-power-dev/customize.py index a5d4a2ebcb..2af085d740 100644 --- a/cm-mlops/script/get-mlperf-power-dev/customize.py +++ b/cm-mlops/script/get-mlperf-power-dev/customize.py @@ -11,5 +11,11 @@ def preprocess(i): def postprocess(i): + env = i['env'] + if env.get('CM_VERSION', '') == '': + env['CM_VERSION'] = "master" + + if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '': + env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH'] return {'return':0} diff --git a/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py b/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py index 71f7554b32..b2b05fe1dc 100644 --- a/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py +++ b/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py @@ -1,6 +1,7 @@ import os import sys import os.path + mlperf_src_path = os.environ['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'] python_path = os.path.join(mlperf_src_path, "python") sys.path.insert(0, python_path) diff --git a/cm-mlops/script/get-preprocessed-dataset-openimages/run.bat b/cm-mlops/script/get-preprocessed-dataset-openimages/run.bat new file mode 100644 index 0000000000..f3ccd2da7b --- /dev/null +++ b/cm-mlops/script/get-preprocessed-dataset-openimages/run.bat @@ -0,0 +1 @@ +%CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\preprocess.py diff --git a/cm-mlops/script/gui/playground_howtorun.py b/cm-mlops/script/gui/playground_howtorun.py index db1d927163..0c57b02549 100644 --- a/cm-mlops/script/gui/playground_howtorun.py +++ b/cm-mlops/script/gui/playground_howtorun.py @@ -8,7 +8,7 @@ import streamlit as st -announcement = 'Under development - please follow the [MLCommons Collective Mind automation project](https://github.com/mlcommons/ck) and stay tuned for more details ...' +announcement = 'Under development - please get in touch via [Discord](https://discord.gg/JjWNWXKxwT) for more details ...' initialized = False external_module_path = '' @@ -56,6 +56,7 @@ def page(st, params, action = ''): ii = {'streamlit_module': st, 'params': params, 'meta': external_module_meta, - 'skip_title': True} + 'skip_title': True, + 'misc_module': misc} return cmind.utils.call_internal_module(None, external_module_path , 'customize', 'gui', ii) diff --git a/cm-mlops/script/install-llvm-src/_cm.json b/cm-mlops/script/install-llvm-src/_cm.json index 7e02fa00ab..d55eccff47 100644 --- a/cm-mlops/script/install-llvm-src/_cm.json +++ b/cm-mlops/script/install-llvm-src/_cm.json @@ -296,7 +296,9 @@ ], "env": { "CM_LLVM_CONDA_ENV": "yes", - "CM_LLVM_16_INTEL_MLPERF_INFERENCE": "yes" + "CM_LLVM_16_INTEL_MLPERF_INFERENCE": "yes", + "USE_CUDA": "0", + "CUDA_VISIBLE_DEVICES": "" } } }, diff --git a/cm-mlops/script/install-pytorch-from-src/_cm.json b/cm-mlops/script/install-pytorch-from-src/_cm.json index bf9288b7d9..f22b09fbfa 100644 --- a/cm-mlops/script/install-pytorch-from-src/_cm.json +++ b/cm-mlops/script/install-pytorch-from-src/_cm.json @@ -109,7 +109,8 @@ }, "env": { "CM_CONDA_ENV": "yes", - "CM_MLPERF_INFERENCE_INTEL": "yes" + "CM_MLPERF_INFERENCE_INTEL": "yes", + "USE_CUDA": "0" }, "deps": [ { diff --git a/cm-mlops/script/launch-benchmark/customize.py b/cm-mlops/script/launch-benchmark/customize.py index 8baf9f2cf8..b6f0e9f745 100644 --- a/cm-mlops/script/launch-benchmark/customize.py +++ b/cm-mlops/script/launch-benchmark/customize.py @@ -1,5 +1,9 @@ import cmind import os +import copy + +base_path={} +base_path_meta={} ################################################################################## def preprocess(i): @@ -46,6 +50,7 @@ def load_cfg(i): prune = i.get('prune',{}) prune_key = prune.get('key', '') + prune_key_uid = prune.get('key_uid', '') prune_uid = prune.get('uid', '') prune_list = prune.get('list',[]) @@ -59,11 +64,21 @@ def load_cfg(i): meta['full_path']=full_path - selection.append(meta) + add = True + + if prune_key!='' and prune_key_uid!='': + if prune_key_uid not in meta.get(prune_key, []): + add = False + + if add: + selection.append(meta) else: for l in lst: path = l.path + main_meta = l.meta + all_tags = main_meta.get('tags',[]) + files = os.listdir(path) for f in files: @@ -83,40 +98,306 @@ def load_cfg(i): else: meta = r['meta'] + # Check base + r = process_base(meta, full_path) + if r['return']>0: return r + meta = r['meta'] + uid = meta['uid'] # Check pruning add = True - if prune_uid!='' and uid != prune_uid: - add = False - - if add and prune_key!='' and len(prune_list)>0 and uid not in prune_list: - add = False + if len(prune)>0: + if prune_uid!='' and uid != prune_uid: + add = False + + if add and len(prune_list)>0 and uid not in prune_list: + add = False + + if add and prune_key!='' and prune_key_uid!='' and prune_key_uid != meta.get(prune_key, None): + add = False if add: meta['full_path']=full_path + add_all_tags = copy.deepcopy(all_tags) + name = meta.get('name','') if name=='': name = ' '.join(meta.get('tags',[])) name = name.strip() meta['name'] = name - + + file_tags = meta.get('tags', '').strip() + if file_tags=='': + if name!='': + add_all_tags += [v.lower() for v in name.split(' ')] + else: + add_all_tags += file_tags.split(',') + + meta['all_tags']=add_all_tags + + meta['main_meta']=main_meta + selection.append(meta) return {'return':0, 'lst':lst, 'selection':selection} +################################################################################## +def process_base(meta, full_path): + + global base_path, base_path_meta + + _base = meta.get('_base', '') + if _base != '': + name = '' + + filename = _base + full_path_base = os.path.dirname(full_path) + + if not filename.endswith('.yaml') and not filename.endswith('.json'): + return {'return':1, 'error':'_base file {} in {} must be .yaml or .json'.format(filename, full_path)} + + if ':' in _base: + x = _base.split(':') + name = x[0] + + full_path_base = base_path.get(name, '') + if full_path_base == '': + + # Find artifact + r = cmind.access({'action':'find', + 'automation':'cfg', + 'artifact':name}) + if r['return']>0: return r + + lst = r['list'] + + if len(lst)==0: + if not os.path.isfile(path): + return {'return':1, 'error':'_base artifact {} not found in {}'.format(name, full_path)} + + full_path_base = lst[0].path + + base_path[name] = full_path_base + + filename = x[1] + + # Load base + path = os.path.join(full_path_base, filename) + + if not os.path.isfile(path): + return {'return':1, 'error':'_base file {} not found in {}'.format(filename, full_path)} + + if path in base_path_meta: + base = copy.deepcopy(base_path_meta[path]) + else: + path_without_ext = path[:-5] + + r = cmind.utils.load_yaml_and_json(path_without_ext) + if r['return']>0: return r + + base = r['meta'] + + base_path_meta[path]=copy.deepcopy(base) + + for k in meta: + v = meta[k] + + if k not in base: + base[k]=v + else: + if isinstance(v, str): + # Only merge a few special keys and overwrite the rest + if k in ['tags','name']: + base[k] += meta[k] + else: + base[k] = meta[k] + + elif type(v) == list: + for vv in v: + base[k].append(vv) + elif type(v) == dict: + base[k].merge(v) + + meta = base + + return {'return':0, 'meta':meta} + + + + +################################################################################## +def get_with_complex_key(meta, key): + + j = key.find('.') + + if j<0: + return meta.get(key) + + key0 = key[:j] + + if key0 not in meta: + return None + + return get_with_complex_key(meta[key0], key[j+1:]) + +################################################################################## +def get_with_complex_key_safe(meta, key): + v = get_with_complex_key(meta, key) + + if v == None: v='' + + return v + +################################################################################## +def prepare_table(i): + + import pandas as pd + import numpy as np + + selection = i['selection'] + misc = i['misc_module'] + + html = '' + + all_data = [] + +# dimensions = [('input.model', 'MLPerf model'), +# ('input.implementation', 'MLPerf implementation'), +# ('input.framework', 'MLPerf framework')] + + dimensions = i.get('dimensions', []) + + dimension_values = {} + dimension_keys = [] + + if len(dimensions) == 0: + keys = [('test', 'CM test', 400, 'leftAligned')] + else: + keys = [('test', 'CM test', 50, 'leftAligned')] + + for k in dimensions: + key = k[0] + + keys.append((k[0], k[1], 100, 'leftAligned')) + + dimension_values[key] = [] + dimension_keys.append(key) + +# # assemble all values +# for s in selection: +# for k in dimensions: +# key = k[0] +# +# value = get_with_complex_key(selection, key) +# +# if value!=None and value!='' and value not in dimension_values[key]: +# dimension_values.append(value) + + # If dimensions, sort by dimensions + for d in list(reversed(dimension_keys)): + selection = sorted(selection, key = lambda x: get_with_complex_key_safe(selection, d)) + + + + keys += [ + ('functional', 'Functional', 80, ''), + ('reproduced', 'Reproduced', 80, ''), + ('notes', 'Notes', 200, 'lefAligned'), + ] + + j = 0 + + badges_url={'functional':'https://cTuning.org/images/artifacts_evaluated_functional_v1_1_small.png', + 'reproduced':'https://cTuning.org/images/results_reproduced_v1_1_small.png'} + + + + + + + for s in selection: + row = {} + + j += 1 + + uid = s['uid'] + + url = misc.make_url(uid, key='uid', action='howtorun', md=False) + + name = s.get('name','') + if name == '': name = uid + + + if len(dimensions) == 0: + row['test'] = '{}'.format(url, name) + else: + row['test'] = 'View'.format(url) + for k in dimensions: + kk = k[0] + + v = get_with_complex_key_safe(s, kk) + + row[kk] = str(v) + + + + + # Check ACM/IEEE functional badge + x = '' + if s.get('functional', False): + x = '
'.format(url, badges_url['functional']) + row['functional'] = x + + # Check ACM/IEEE reproduced badge + x = '' + if s.get('reproduced', False): + x = '
'.format(url, badges_url['reproduced']) + row['reproduced'] = x + + # Check misc notes + row['notes']=s.get('notes','') + + # Finish row + all_data.append(row) + + + # Visualize table + pd_keys = [v[0] for v in keys] + pd_key_names = [v[1] for v in keys] + + pd_all_data = [] + for row in sorted(all_data, key=lambda row: (row.get('x1',0))): + pd_row=[] + for k in pd_keys: + pd_row.append(row.get(k)) + pd_all_data.append(pd_row) + + df = pd.DataFrame(pd_all_data, columns = pd_key_names) + + df.index+=1 + + return {'return':0, 'df':df} + + + + + ################################################################################## def gui(i): params = i['params'] st = i['streamlit_module'] + misc = i['misc_module'] meta = i['meta'] gui_meta = meta['gui'] skip_header = i.get('skip_title', False) - + + end_html = '' + if not skip_header: # Title title = gui_meta['title'] @@ -130,82 +411,153 @@ def gui(i): x = params.get('uid',['']) if len(x)>0 and x[0]!='': uid = x[0].strip() + bench_uid = '' + x = params.get('bench_uid',['']) + if len(x)>0 and x[0]!='': bench_uid = x[0].strip() + + compute_uid = '' + x = params.get('compute_uid',['']) + if len(x)>0 and x[0]!='': compute_uid = x[0].strip() - # Preparing state - if 'bench_id' not in st.session_state: st.session_state['bench_id']=0 - if 'compute_id' not in st.session_state: st.session_state['compute_id']=0 - ############################################################## # Check the first level of benchmarks - bench_id = 0 - - ii = {'tags':'benchmark,run', 'skip_files':True} + ii = {'tags':'benchmark,run', 'skip_files':True, 'prune':{}} if uid != '': ii['skip_files'] = False - ii['prune']={'uid':uid} + ii['prune']['uid']=uid + if bench_uid !='': + ii['artifact']=bench_uid + if compute_uid !='': + ii['prune']['key']='supported_compute' + ii['prune']['key_uid']=compute_uid r=load_cfg(ii) if r['return']>0: return r lst = r['selection'] + + if len(lst)==0: + st.markdown('Warning: no benchmarks found!') + return {'return':0} test_meta = {} + bench_id = 0 + + + ########################################################################################################### + if uid != '': + if len(lst)==0: + st.markdown('CM test with UID "{}" not found!'.format(uid)) + return {'return':0} + elif len(lst)>1: + st.markdown('Warning: More than 1 CM test found with UID "{}" - ambiguity!'.format(uid)) + return {'return':0} + + test_meta = lst[0] + + bench_id = 1 + compute_uid = test_meta['compute_uid'] + bench_supported_compute = [compute_uid] + + if uid == '': selection = sorted(lst, key = lambda v: v['name']) bench_selection = [{'name':''}] + selection + if bench_uid !='': + bench_id_index = 1 + else: + # Check if want to force some benchmark by default + # 27c06c35bceb4059 == MLPerf inference v4.0 + + bench_id_index = 0 + + j=0 + for b in bench_selection: + if b.get('uid','')=='27c06c35bceb4059': + bench_id_index=j + break + j+=1 + + bench_id = st.selectbox('Select benchmark:', range(len(bench_selection)), format_func=lambda x: bench_selection[x]['name'], - index = 0, + index = bench_id_index, key = 'bench') bench_supported_compute = [] bench_meta = {} - if bench_id != st.session_state['bench_id']: + if bench_id>0: bench_meta = bench_selection[bench_id] bench_supported_compute = bench_meta.get('supported_compute',[]) - urls = bench_meta.get('urls',[]) - if len(urls)>0: - x = '\n' - for u in urls: - name = u['name'] - url = u['url'] - - x+=' [ [{}]({}) ] '.format(name, url) - x+='\n' - st.markdown(x) + urls = bench_meta.get('urls',[]) + if len(urls)>0: + x = '\n' + for u in urls: + name = u['name'] + url = u['url'] + x+=' [ [{}]({}) ] '.format(name, url) + x+='\n' + st.markdown(x) + + ########################################################################################################### + if True==True: ############################################################## # Check compute - r=load_cfg({'tags':'benchmark,compute', - 'prune':{'key':'supported_compute', 'list':bench_supported_compute}}) - if r['return']>0: return r - selection = sorted(r['selection'], key = lambda v: v['name']) - compute_selection = [{'name':''}] + selection + ii = {'tags':'benchmark,compute'} + if bench_id>0: + if compute_uid !='': + x = [compute_uid] + else: + x = bench_supported_compute + if len(x) == 0: + st.markdown('Warning: no supported compute selected!') + return {'return':0} + + ii['prune']={'list':x} + r=load_cfg(ii) + if r['return']>0: return r - # Creating compute selector - compute_id = st.selectbox('Select target hardware:', - range(len(compute_selection)), - format_func=lambda x: compute_selection[x]['name'], - index = 0, - key = 'compute') + selection = sorted(r['selection'], key = lambda v: v['name']) - if compute_id!=st.session_state['compute_id']: - st.session_state['compute_id']=compute_id + if len(selection) == 0 : + st.markdown('Warning: no supported compute found!') + return {'return':0} + + compute_selection = [{'name':''}] + if len(selection)>0: + compute_selection += selection - try: - st.rerun() - except: - st.experimental_rerun() + compute_id_index = 0 if compute_uid == '' else 1 + + if uid == '': + compute_id = st.selectbox('Select target hardware:', + range(len(compute_selection)), + format_func=lambda x: compute_selection[x]['name'], + index = compute_id_index, + key = 'compute') + + compute = {} + if compute_id>0: + compute = compute_selection[compute_id] + compute_uid = compute['uid'] + + compute_meta = {} + for c in compute_selection: + if c.get('uid','')!='': + compute_meta[c['uid']]=c + ########################################################################################################### + if uid == '': ############################################################## # Check tests @@ -214,7 +566,8 @@ def gui(i): if bench_id>0: bench_uid = bench_selection[bench_id]['uid'] ii['artifact']=bench_uid - + if compute_uid!='': + ii['prune']={'key':'compute_uid', 'key_uid':compute_uid} r=load_cfg(ii) if r['return']>0: return r @@ -222,45 +575,119 @@ def gui(i): selection = sorted(r['selection'], key = lambda v: v['name']) # Check how many and prune - if len(selection)>1: + if len(selection) == 0: + st.markdown('No CM tests found') + return {'return':0} + + for s in selection: + c_uid = s.get('compute_uid','') + if c_uid!='': + c_tags = compute_meta[c_uid].get('tags','') + if c_tags!='': + s['all_tags']+=c_tags.split(',') - test_tags = st.text_input('Found {} CM tests. Prune them by tags:'.format(str(len(selection))), value='', key='test_tags').strip() + s['compute_meta']=compute_meta[c_uid] + + if len(selection)>1: + # Update selection with compute tags + test_tags = '' + x = params.get('tags',['']) + if len(x)>0 and x[0]!='': test_tags = x[0].strip() + test_tags = st.text_input('Found {} CM tests. Prune them by tags:'.format(str(len(selection))), value=test_tags, key='test_tags').strip() + if test_tags!='': + test_tags_list = test_tags.replace(' ',',').split(',') + pruned_selection = [] + + for s in selection: + all_tags = s['all_tags'] + + add = True + + for t in test_tags_list: + if t not in all_tags: + add = False + break + + if add: + pruned_selection.append(s) - + selection = pruned_selection + test_selection = [{'name':''}] + selection - # Creating compute selector - test_id = st.selectbox('Select test:', - range(len(test_selection)), - format_func=lambda x: test_selection[x]['name'], - index = 0, - key = 'test') - + if len(selection)<200: + # Creating compute selector + test_id_index = 1 if len(selection)==1 else 0 + + test_id = st.selectbox('Select a test from {}:'.format(str(len(selection))), + range(len(test_selection)), + format_func=lambda x: test_selection[x]['name'], + index = test_id_index, + key = 'test') + + + if test_id >0: + test_meta = test_selection[test_id] + else: + ######################################################################### + # View many (table) + ii = {'selection':selection, + 'misc_module':misc} + + # Check if dimensions in the bench + dimensions = bench_meta.get('dimensions', []) + if len(dimensions)>0: + viewer_selection = ['benchmark specific', 'universal'] + + viewer = st.selectbox('Viewer:', viewer_selection, key = 'viewer') + + if viewer == 'benchmark specific': + ii['dimensions'] = dimensions + + else: + st.markdown('---') + + r = prepare_table(ii) + if r['return']>0: return r + + df = r['df'] + + html=df.to_html(escape=False, justify='left') + st.write(html, unsafe_allow_html = True) + +# st.dataframe(df, unsafe_allow_html = True) + + + + - if test_id >0: - test_meta = test_selection[test_id] - - else: - if len(lst)==0: - st.markdown('CM test with UID "{}" not found!'.format(uid)) - return {'return':0} - elif len(lst)>1: - st.markdown('Warning: More than 1 CM test found with UID "{}" - ambiguity!'.format(uid)) - return {'return':0} - test_meta = lst[0] ############################################################## + # Show individual test if len(test_meta)>0: - st.markdown('---') - st.markdown(str(test_meta)) + if uid != '': + c_uid = test_meta.get('compute_uid','') + if c_uid!='': + c_tags = compute_meta[c_uid].get('tags','') + if c_tags!='': + test_meta['all_tags']+=c_tags.split(',') + + test_meta['compute_meta']=compute_meta[c_uid] + + if uid == '': + st.markdown('---') + + uid = test_meta['uid'] + + # First, check if there is a README test_path = test_meta['full_path'] test_md = test_meta['full_path'][:-5]+'.md' @@ -271,11 +698,28 @@ def gui(i): s = r['string'] - st.markdown('---') - st.markdown(s) + # Next print some info (for now JSON) + import json + x = """ +--- +**CM test dictionary:** +```json +{} +``` + """.format(json.dumps(test_meta, indent=2)) + st.markdown(x) - return {'return':0} + + + + + # Create self link + # This misc module is in CM "gui" script + x1 = misc.make_url(uid, key='uid', action='howtorun', md=False) + end_html='
Self link
'.format(x1) + + return {'return':0, 'end_html': end_html} diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml index 82aa963a7e..d7916af24b 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml @@ -38,6 +38,8 @@ default_env: env: CM_CALL_MLPERF_RUNNER: 'no' + CUDA_VISIBLE_DEVICES: '' + USE_CUDA: '0' # Map script inputs to environment variables input_mapping: @@ -158,6 +160,9 @@ post_deps: - run_harness tags: benchmark-mlperf + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state # Variations to customize dependencies variations: @@ -295,9 +300,11 @@ variations: - tags: get,generic-python-lib,_package.datasets names: - pip-package + - datasets - tags: get,generic-python-lib,_package.accelerate names: - pip-package + - accelerate gptj-99: group: model diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/build_gptj_harness.sh b/cm-mlops/script/reproduce-mlperf-inference-intel/build_gptj_harness.sh index 2219eed64b..31eae300cb 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/build_gptj_harness.sh +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/build_gptj_harness.sh @@ -21,6 +21,9 @@ mkdir -p ${WORKLOAD_DATA}/model export INT8_MODEL_DIR=${WORKLOAD_DATA}/gpt-j-int8-model export INT4_MODEL_DIR=${WORKLOAD_DATA}/gpt-j-int4-model +if [[ -f ${INT8_MODEL_DIR}/best_model.pt ]]; then + exit 0 +fi python download-calibration-dataset.py --calibration-list-file calibration-list.txt --output-dir ${WORKLOAD_DATA}/calibration-data python download-dataset.py --split validation --output-dir ${WORKLOAD_DATA}/validation-data @@ -30,5 +33,4 @@ export VALIDATION_DATA_JSON=${WORKLOAD_DATA}/validation-data/cnn_dailymail_valid export INT4_CALIBRATION_DIR=${WORKLOAD_DATA}/quantized-int4-model #sudo -E bash run_quantization.sh bash run_quantization.sh -exit 1 test $? -eq 0 || exit $? diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py b/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py index 05279e631e..a924508970 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/customize.py @@ -92,28 +92,6 @@ def preprocess(i): def postprocess(i): env = i['env'] - if env.get('CM_MLPERF_README', '') == "yes": - import cmind as cm - inp = i['input'] - state = i['state'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'print_deps': True, - 'env': env, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + state = i['state'] return {'return':0} diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/run_gptj_harness.sh b/cm-mlops/script/reproduce-mlperf-inference-intel/run_gptj_harness.sh index 2b2c733476..07585c015b 100755 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/run_gptj_harness.sh +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/run_gptj_harness.sh @@ -1,7 +1,7 @@ #!/bin/bash export PATH=${CM_CONDA_BIN_PATH}:$PATH -export KMP_BLOCKTIME=4 +export KMP_BLOCKTIME=10 export KMP_AFFINITY=granularity=fine,compact,1,0 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index 28ffbf5f4d..9ebae73d23 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -291,7 +291,9 @@ post_deps: enable_if_env: CM_CALL_MLPERF_RUNNER: - yes - + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state # Variations to customize dependencies variations: diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py index df0288e8d9..ddc8ca961d 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py @@ -416,29 +416,6 @@ def preprocess(i): def postprocess(i): env = i['env'] - - if env.get('CM_MLPERF_README', '') == "yes": - import cmind as cm - inp = i['input'] - state = i['state'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'env': env, - 'print_deps': True, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + state = i['state'] return {'return':0} diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml index 601e4e0dc2..1b1b0b73ea 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml @@ -261,6 +261,9 @@ post_deps: - yes tags: benchmark-mlperf + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state # Variations to customize dependencies variations: diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py index ae7cc0e40a..d24804eb3a 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py @@ -183,28 +183,5 @@ def preprocess(i): def postprocess(i): env = i['env'] - if env.get('CM_MLPERF_README', '') == "yes": - import cmind as cm - inp = i['input'] - state = i['state'] - script_tags = inp['tags'] - script_adr = inp.get('add_deps_recursive', inp.get('adr', {})) - - cm_input = {'action': 'run', - 'automation': 'script', - 'tags': script_tags, - 'adr': script_adr, - 'print_deps': True, - 'env': env, - 'quiet': True, - 'silent': True, - 'fake_run': True - } - r = cm.access(cm_input) - if r['return'] > 0: - return r - - state['mlperf-inference-implementation'] = {} - state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] return {'return':0} diff --git a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml index 1d1e824849..2d1771243f 100644 --- a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml +++ b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml @@ -44,6 +44,8 @@ input_mapping: debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM device: CM_MLPERF_DEVICE division: CM_MLPERF_SUBMISSION_DIVISION + dump_version_info: CM_DUMP_VERSION_INFO + save_console_log: CM_SAVE_CONSOLE_LOG execution_mode: CM_MLPERF_EXECUTION_MODE find_performance: CM_MLPERF_FIND_PERFORMANCE_MODE gpu_name: CM_NVIDIA_GPU_NAME @@ -101,6 +103,12 @@ deps: tags: get,mlcommons,inference,src - tags: get,sut,description +- tags: get,mlperf,inference,results,dir + names: + - get-mlperf-inference-results-dir + skip_if_env: + OUTPUT_BASE_DIR: [ on ] + docker: fake_run_deps: true mounts: diff --git a/cm-mlops/script/run-mlperf-inference-app/customize.py b/cm-mlops/script/run-mlperf-inference-app/customize.py index 56a6624c44..72f0d11e10 100644 --- a/cm-mlops/script/run-mlperf-inference-app/customize.py +++ b/cm-mlops/script/run-mlperf-inference-app/customize.py @@ -19,6 +19,7 @@ def preprocess(i): if env.get('CM_RUN_DOCKER_CONTAINER', '') == "yes": return {'return':0} + dump_version_info = env.get('CM_DUMP_VERSION_INFO', True) system_meta = state['CM_SUT_META'] env['CM_SUT_META_EXISTS'] = "yes" @@ -91,9 +92,8 @@ def preprocess(i): else: env['CM_MLPERF_LOADGEN_MODES'] = [ env['CM_MLPERF_LOADGEN_MODE'] ] - - if 'OUTPUT_BASE_DIR' not in env: - env['OUTPUT_BASE_DIR'] = os.getcwd() + if env.get('OUTPUT_BASE_DIR', '') == '': + env['OUTPUT_BASE_DIR'] = env.get('CM_MLPERF_INFERENCE_RESULTS_DIR', os.getcwd()) test_list = ["TEST01", "TEST05"] if env['CM_MODEL'] in ["resnet50"]: @@ -138,6 +138,7 @@ def preprocess(i): print ('=========================================================') local_keys = [ 'CM_MLPERF_SKIP_RUN', 'CM_MLPERF_LOADGEN_QUERY_COUNT' ] + for scenario in env['CM_MLPERF_LOADGEN_SCENARIOS']: scenario_tags = tags + ",_"+scenario.lower() env['CM_MLPERF_LOADGEN_SCENARIO'] = scenario @@ -161,7 +162,7 @@ def preprocess(i): print(f"\nRunning loadgen scenario: {scenario} and mode: {mode}") ii = {'action':'run', 'automation':'script', 'tags': scenario_tags, 'quiet': 'true', 'env': env, 'input': inp, 'state': state, 'add_deps': add_deps, 'add_deps_recursive': - copy.deepcopy(add_deps_recursive), 'ad': ad, 'adr': copy.deepcopy(adr), 'v': verbose, 'print_env': print_env, 'print_deps': print_deps} + copy.deepcopy(add_deps_recursive), 'ad': ad, 'adr': copy.deepcopy(adr), 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info} r = cm.access(ii) if r['return'] > 0: return r @@ -186,7 +187,7 @@ def preprocess(i): env['CM_MLPERF_LOADGEN_MODE'] = "compliance" r = cm.access({'action':'run', 'automation':'script', 'tags': scenario_tags, 'quiet': 'true', 'env': env, 'input': inp, 'state': state, 'add_deps': add_deps, 'add_deps_recursive': - copy.deepcopy(add_deps_recursive), 'adr': copy.deepcopy(adr), 'ad': ad, 'v': verbose, 'print_env': print_env, 'print_deps': print_deps}) + copy.deepcopy(add_deps_recursive), 'adr': copy.deepcopy(adr), 'ad': ad, 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info}) if r['return'] > 0: return r diff --git a/cm-mlops/script/save-mlperf-inference-implementation-state/_cm.yaml b/cm-mlops/script/save-mlperf-inference-implementation-state/_cm.yaml new file mode 100644 index 0000000000..4f1deee8e2 --- /dev/null +++ b/cm-mlops/script/save-mlperf-inference-implementation-state/_cm.yaml @@ -0,0 +1,13 @@ +alias: save-mlperf-inference-implementation-state +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +new_state_keys: + - mlperf-inference-implementation +tags: +- save +- mlperf +- inference +- implementation +- state +uid: b14b813229c444f8 diff --git a/cm-mlops/script/save-mlperf-inference-implementation-state/customize.py b/cm-mlops/script/save-mlperf-inference-implementation-state/customize.py new file mode 100644 index 0000000000..be3be96798 --- /dev/null +++ b/cm-mlops/script/save-mlperf-inference-implementation-state/customize.py @@ -0,0 +1,63 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + state = i['state'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + if not state.get('mlperf-inference-implementation'): #No state information. Just returning + return {'return': 0} + + if env.get('CM_MLPERF_README', "") == "yes": + import cmind as cm + inp = i['input'] + + script_tags = state['mlperf-inference-implementation'].get('script_tags', '') + script_adr = state['mlperf-inference-implementation'].get('script_adr', {}) + + if script_tags != '': + cm_input = {'action': 'run', + 'automation': 'script', + 'tags': script_tags, + 'adr': script_adr, + 'env': env, + 'print_deps': True, + 'quiet': True, + 'silent': True, + 'fake_run': True + } + + r = cm.access(cm_input) + if r['return'] > 0: + return r + + state['mlperf-inference-implementation']['print_deps'] = r['new_state']['print_deps'] + + if env.get('CM_DUMP_VERSION_INFO', True): + + if state['mlperf-inference-implementation'].get('script_id', '') == '': + state['mlperf-inference-implementation']['script_id'] = '' + + script_id = state['mlperf-inference-implementation']['script_id'] + run_state = i['input']['run_state'] + version_info = {} + version_info[script_id] = run_state['version_info'] + + state['mlperf-inference-implementation']['version_info'] = version_info + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat b/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat deleted file mode 100644 index 0fd4284329..0000000000 --- a/cm-mlops/script/test-mlperf-inference-retinanet-win/run.bat +++ /dev/null @@ -1,8 +0,0 @@ -echo. - -set CUR_DIR=%cd% -set SCRIPT_DIR=%CM_TMP_CURRENT_SCRIPT_PATH% - -cd %CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH% - -%CM_PYTHON_BIN% python/main.py --profile retinanet-onnxruntime --scenario Offline --model %CM_ML_MODEL_FILE_WITH_PATH% --dataset-path %CM_DATASET_PATH_ROOT% --accuracy diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/README.md b/cm-mlops/script/test-mlperf-inference-retinanet/README.md similarity index 100% rename from cm-mlops/script/test-mlperf-inference-retinanet-win/README.md rename to cm-mlops/script/test-mlperf-inference-retinanet/README.md diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json b/cm-mlops/script/test-mlperf-inference-retinanet/_cm.json similarity index 94% rename from cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json rename to cm-mlops/script/test-mlperf-inference-retinanet/_cm.json index 8d3bb8861d..fb8be75934 100644 --- a/cm-mlops/script/test-mlperf-inference-retinanet-win/_cm.json +++ b/cm-mlops/script/test-mlperf-inference-retinanet/_cm.json @@ -1,5 +1,5 @@ { - "alias": "test-mlperf-inference-retinanet-win", + "alias": "test-mlperf-inference-retinanet", "automation_alias": "script", "automation_uid": "5b4e0237da074764", "category": "CM interface prototyping", diff --git a/cm-mlops/script/test-mlperf-inference-retinanet-win/customize.py b/cm-mlops/script/test-mlperf-inference-retinanet/customize.py similarity index 100% rename from cm-mlops/script/test-mlperf-inference-retinanet-win/customize.py rename to cm-mlops/script/test-mlperf-inference-retinanet/customize.py diff --git a/cm-mlops/script/test-mlperf-inference-retinanet/run.bat b/cm-mlops/script/test-mlperf-inference-retinanet/run.bat new file mode 100644 index 0000000000..38970bc0ef --- /dev/null +++ b/cm-mlops/script/test-mlperf-inference-retinanet/run.bat @@ -0,0 +1,8 @@ +echo. + +set CUR_DIR=%cd% +set SCRIPT_DIR=%CM_TMP_CURRENT_SCRIPT_PATH% + +cd %CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH% + +%CM_PYTHON_BIN_WITH_PATH% python/main.py --profile retinanet-onnxruntime --scenario Offline --model %CM_ML_MODEL_FILE_WITH_PATH% --dataset-path %CM_DATASET_PATH_ROOT%\validation\data --accuracy diff --git a/cm-mlops/script/test-mlperf-inference-retinanet/run.sh b/cm-mlops/script/test-mlperf-inference-retinanet/run.sh new file mode 100644 index 0000000000..b437374079 --- /dev/null +++ b/cm-mlops/script/test-mlperf-inference-retinanet/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +echo "" + +cd ${CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH} + +ln -s ${CM_DATASET_PATH_ROOT}/annotations ${CM_DATASET_PATH_ROOT}/validation/data/annotations + +${CM_PYTHON_BIN_WITH_PATH} python/main.py --profile retinanet-onnxruntime --scenario Offline --model ${CM_ML_MODEL_FILE_WITH_PATH} --dataset-path ${CM_DATASET_PATH_ROOT}/validation/data --accuracy diff --git a/cmr.yaml b/cmr.yaml index 48ae611d66..994aaf98fa 100644 --- a/cmr.yaml +++ b/cmr.yaml @@ -1,5 +1,8 @@ alias: mlcommons@ck +uid: a4705959af8e447a + git: true + prefix: cm-mlops -uid: a4705959af8e447a + version: 1.6.2.1 diff --git a/docs/mlperf/README.md b/docs/mlperf/README.md index ae7fe74753..51d2859931 100644 --- a/docs/mlperf/README.md +++ b/docs/mlperf/README.md @@ -1,17 +1,21 @@ [ [Back to CM documentation](../README.md) ] -# Run MLPerf benchmarks out-of-the-box +# Run and customize MLPerf benchmarks using the MLCommons CM automation framework -This documentation will help you run, reproduce and compare MLPerf benchmarks out-of-the-box -in a unified way across different software, hardware, models and data sets using -the the [MLCommons Collective Mind automation language (CM)](https://doi.org/10.5281/zenodo.8105339). +This documentation explains how to run, customize and extend MLPerf benchmarks +in a unified way across diverse models, data sets, software and hardware from different vendors +using [MLCommons Collective Mind automation recipes](https://access.cknowledge.org/playground/?action=scripts): -Please choose which benchmark you want to run: * [MLPerf inference benchmark](inference/README.md) * [MLPerf training benchmark](../tutorials/reproduce-mlperf-training.md) *(prototyping phase)* * [MLPerf tiny benchmark](../tutorials/reproduce-mlperf-tiny.md) *(prototyping phase)* +* MLPerf automotive *(prototyping phase)* * MLPerf mobile *(preparation phase)* +* MLPerf client *(preparation phase)* -This project is under development by the [MLCommons Task Force on Automation and Reproducibility](../taskforce.md), -[cTuning.org](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org) - don't hesitate to get in touch -via the [public Discord server](https://discord.gg/JjWNWXKxwT). +*Note that the [MLCommons Task Force on Automation and Reproducibility](../taskforce.md) + is preparing a [GUI](https://access.cknowledge.org/playground/?action=howtorun) + to make it easier to run, customize, reproduce and compare + MLPerf benchmarks - please stay tuned for more details!* + +Don't hesitate to get in touch via the [public Discord server](https://discord.gg/JjWNWXKxwT) if you have questions or feedback! diff --git a/platform/README.md b/platform/README.md index 59cfc9ec3f..0725ac3a5e 100644 --- a/platform/README.md +++ b/platform/README.md @@ -1,49 +1,32 @@ # Collective Knowledge Playground -*Note that this project is under heavy development. - We are preparing a major update based on very useful feedback from our users during MLPerf inference 3.1 community submission!* +*This project is under heavy development led by the [MLCommons Task Force on Automation and Reproducibility](../docs/taskforce.md), + [cTuning.org](https://cTuning.org) and [cKnowledge.org](cKnowledge.org) - please join the [public Discord server]() to discuss this project!* + + ### Introduction -The [Collective Knowledge Playground (CK)](https://x.cknowledge.org) is a free, open-source, and technology-agnostic on-prem platform -being developed by the [MLCommons task force on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce). -It is intended to connect academia and industry to benchmark, optimize and compare AI, ML and other emerging applications +The [Collective Knowledge Playground (CK)](https://access.cknowledge.org) is a free, open-source, and technology-agnostic on-prem platform +being developed by the [MLCommons task force on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +to benchmark, optimize and compare AI, ML and other emerging applications across diverse and rapidly evolving models, software, hardware and data from different vendors in terms of costs, performance, power consumption, accuracy, size and other metrics in a unified, collaborative, automated, and reproducible way. -This platform is powered by the portable and technology-agnostic [Collective Mind scripting language (MLCommons CM)]( https://github.com/mlcommons/ck/tree/master/cmind ) -with [portable and reusable CM scripts](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) -developed by the community to solve the "AI/ML dependency hell". CM scripts help to automatically connect -diverse and continuously changing models, software, hardware, data sets, best practices and optimization techniques -into end-to-end applications in a transparent and non-intrusive way. - -We thank [the community](https://access.cknowledge.org/playground/?action=contributors) -for helping us to validate a prototype of the MLCommons CK playground by running and reproducing -[MLPerf inference v3.0 benchmarks](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,v3.0,community-submission,open,edge,image-classification,singlestream): -CK has helped to automatically interconnect very diverse technology from Neural Magic, Qualcomm, Krai, cKnowledge, OctoML, Deelvin, DELL, HPE, Lenovo, Hugging Face, Nvidia and Apple -and run it across diverse CPUs, GPUs and DSPs with PyTorch, -ONNX, QAIC, TF/TFLite, TVM and TensorRT using popular cloud providers (GCP, AWS, Azure) and individual servers and edge devices -via our recent [open optimization challenge](https://access.cknowledge.org/playground/?action=challenges&name=optimize-mlperf-inference-v3.0-2023). +This platform is powered by the [Collective Mind automation framework (MLCommons CM)](https://github.com/mlcommons/ck) +with [portable, reusable and technology-agnostic automation recipes (CM scripts)](https://access.cknowledge.org/playground/?action=scripts) +developed by the [community](https://access.cknowledge.org/playground/?action=contributors) to solve the "AI/ML dependency hell". ### Public GUI -* [Platform preview](https://x.cKnowledge.org) +* [Platform preview](https://access.cKnowledge.org) * [GUI to run MLPerf inference benchmarks](http://cknowledge.org/mlperf-inference-gui) * [GUI to prepare MLPerf inference submissions](https://cknowledge.org/mlperf-inference-submission-gui) -### Collaborative development -This open-source technology is being developed by the -[MLCommons task force on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) -led by [Grigori Fursin](https://cKnowledge.org/gfursin) and -[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh): -* Join our [public Discord server](https://discord.gg/JjWNWXKxwT). -* Join our [public conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw). -* Check our [news](docs/news.md). -* Check our [presentation](https://doi.org/10.5281/zenodo.7871070) with development plans. -* Read about our [CK concept (previous version before MLCommons)](https://doi.org/10.1098/rsta.2020.0211). +### Collaborative development #### Source code for on-prem use @@ -55,11 +38,12 @@ using the MLCommons CM scripting language. Discuss your challenge in Discord, add your challenge [here](https://github.com/mlcommons/ck/tree/master/cm-mlops/challenge) and create a PR. + #### Private challenges You can use this platform to organize private challenges between your internal teams and external partners. -Install the MLCommons CK2 (CM) framework as described [here](https://github.com/mlcommons/ck/blob/master/docs/installation.md). +Install the MLCommons CM framework as described [here](https://github.com/mlcommons/ck/blob/master/docs/installation.md). Pull CM repository with portable MLOps automation recipes from the community: ```bash @@ -76,16 +60,6 @@ as a public or private server to run optimization experiments with your colleagues, external teams and users. -### Copyright - -2021-2023 [MLCommons](https://mlcommons.org) - ### License [Apache 2.0](LICENSE.md) - -### Acknowledgments - -This project is currently supported by [MLCommons](https://mlcommons.org), [cTuning foundation](https://cTuning.org), -[cKnowledge](https://cKnowledge.org) and [individual contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md). -We thank [HiPEAC](https://hipeac.net) and [OctoML](https://octoml.ai) for sponsoring initial development. diff --git a/platform/get-started.md b/platform/get-started.md deleted file mode 100644 index e02b7a66be..0000000000 --- a/platform/get-started.md +++ /dev/null @@ -1,38 +0,0 @@ -# Getting Started Guide - -## Reproducing and improving MLPerf inference results - -The [Collective Knowledge platform](https://access.cKnowledge.org) -is currently having experiment results from MLPerf Inference v2.0, v2.1 and v3.0 -in the [extensible CM format](https://github.com/mlcommons/ck_mlperf_results) -and with the possibility to add derived metrics such as power efficiency. - -We are currently preparing the [optimization challenge for MLPerf Inference v3.1](https://github.com/ctuning/mlcommons-ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/README.md). - -For MLPerf inference 3.1 we have the following benchmark tasks -1. [Image Classification](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md) using ResNet50 model and Imagenet-2012 dataset -2. [Object Detection](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md) using Retinanet model and OpenImages dataset -3. [Language processing](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md) using Bert-Large model and Squadv1.1 dataset -4. [Speech Recognition](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md) using RNNT model and LibriSpeech dataset -5. [Medical Imaging](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md) using 3d-unet model and KiTS19 dataset -6. Recommendation using DLRM model and Criteo dataset -7. Large Language Model (Pending) - -The tasks are divided into -1. Edge (SingleStream, MultiStream and Offline scenarios) and -2. Datacenter (Offline and Server scenarios) categories. - -Results can be submitted under -1. closed (requires compliance runs, strict accuracy requirement, no retraining and subject to audit) and -2. open divisions (only dataset is fixed). - -Results can be just performance or performance with power. - -## Participating in other optimization challenges - -Check our on-going optimization challenges [here](https://access.cknowledge.org/playground/?action=challenges) -and join our [public Discord server](https://access.cknowledge.org/playground/?action=challenges) to discuss them. - -## Further reading - -* [Project documentation](../docs/README.md) diff --git a/platform/register.md b/platform/register.md index 323e8352e8..4f2a15f4c8 100644 --- a/platform/register.md +++ b/platform/register.md @@ -1,8 +1,10 @@ -# Register for Collective Knowledge challenges +# Register for benchmarking and optimization challenges Please join the [public Discord server](https://discord.gg/JjWNWXKxwT) -from the [MLCommons Task Force on Automation and Reproducibility](../docs/taskforce.md) -and send your name, organization and URL to @gfursin and @arjunsuresh -(task force co-chairs and organizers of open challenges). +to tell the [MLCommons Task Force on Automation and Reproducibility](../docs/taskforce.md) +about your interest to participate in our benchmarking and optimization challenges. + +*We plan to add a registration GUI to the [MLCommons Collective Knowledge playground](https://access.cKnowledge.org) + in the future - please stay tuned for more details!* + -In the future, we plan to add a registration GUI to our [MLCommons Collective Knowledge playground](https://access.cKnowledge.org). diff --git a/platform/register2.md b/platform/register2.md deleted file mode 100644 index 1d9a9fde01..0000000000 --- a/platform/register2.md +++ /dev/null @@ -1,59 +0,0 @@ -# Register for Collective Knowledge challenges - -Since the [MLCommons CK playground](https://access.cKnowledge.org) -is still in the heavy development stage, the registration is not yet automated via CK GUI. - -You can simply add add your name, organization and URL in this [GitHub ticket](https://github.com/mlcommons/ck/issues/855). - -You name will be added to the [CK leaderboard](https://access.cknowledge.org/playground) -with 1 point after your PR is accepted (to support your intent to participate in our collaborative effort). - -You can add yourself to this [GitHub repository](https://github.com/mlcommons/ck/tree/master/cm-mlops/contributor) -using our [CM automation language](https://doi.org/10.5281/zenodo.8105339) from the command line as follows. - -Install [CM](../docs/installation.md) on your system. - -Fork https://github.com/mlcommons/ck . - -Pull it via CM as follows: - -```bash -cm pull repo --url={URL of the fork of github.com/mlcommons/ck} -``` - -Note that if you already have `mlcommons@ck` repository installed via CM, -you need to delete it and then install your fork: -```bash -cm rm repo mlcommons@ck --all -cm pull repo --url={URL of the fork of github.com/mlcommons/ck} -``` -Create a new contributor with your name: -```bash -cm add contributor "your name" -``` - -CM will ask you a few questions and will create a new CM contributor entry with your name. - -You can commit this entry to your fork and create a PR to https://github.com/mlcommons/ck . - -*Note that you will need to sign MLCommons CLA to contribute to MLCommons projects - it may take a few days to approve it by MLCommons*. - -Note that you will need CM and your fork of https://github.com/mlcommons/ck to participate in challenges, -so please keep and use it. - -Happy hacking! - -## Discussions - -You can now join the [public Discord server](https://discord.gg/JjWNWXKxwT) -from the [MLCommons Task Force on Automation and Reproducibility](../docs/taskforce.md) -to ask any questions, provide feedback and discuss challenges! - -## Our mission - -You can learn more about our mission [here](https://doi.org/10.5281/zenodo.8105339). - -## Organizers - -* [Grigori Fursin](https://cKnowledge.org/gfursin) and [Arjun Suresh](https://www.linkedin.com/in/arjunsuresh) - ([MLCommons](https://mlcommons.org), [cTuning.org](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org)) diff --git a/platform/scripts/1-install-deps-cloud.sh b/platform/scripts/1-install-deps-cloud.sh old mode 100755 new mode 100644 diff --git a/platform/scripts/1-install-deps.sh b/platform/scripts/1-install-deps.sh old mode 100755 new mode 100644 diff --git a/platform/scripts/2-run-in-cloud-nohup.sh b/platform/scripts/2-run-in-cloud-nohup.sh old mode 100755 new mode 100644 diff --git a/platform/scripts/2-run-in-cloud.sh b/platform/scripts/2-run-in-cloud.sh old mode 100755 new mode 100644 diff --git a/platform/scripts/restart_apache2.sh b/platform/scripts/restart_apache2.sh old mode 100755 new mode 100644