diff --git a/ck/CONTRIBUTING.md b/ck/CONTRIBUTING.md index 551cc66a9f..07d85f3537 100644 --- a/ck/CONTRIBUTING.md +++ b/ck/CONTRIBUTING.md @@ -69,5 +69,5 @@ * @filven * @ValouBambou -See more acknowledgments at the end of this [article](https://arxiv.org/abs/2011.01149) +See more acknowledgments at the end of this [article](https://doi.org/10.1098/rsta.2020.0211) describing Collective Knowledge v1 concepts. diff --git a/ck/README.md b/ck/README.md index 56ea9ff51a..e69a9947df 100644 --- a/ck/README.md +++ b/ck/README.md @@ -2,7 +2,7 @@

-**Note that this directory is in archive mode since the [Collective Knowledge framework (v1 and v2)](https://arxiv.org/abs/2011.01149) +**Note that this directory is in archive mode since the [Collective Knowledge framework (v1 and v2)](https://doi.org/10.1098/rsta.2020.0211) is now officially discontinued in favour of the new, light-weight, non-intrusive and technology-agnostic [Collective Mind workflow automation language](https://doi.org/10.5281/zenodo.8105339) being developed, supported and maintained by the [MLCommons](https://mlcommons.org), [cTuning.org](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org).** @@ -280,5 +280,5 @@ The community provides Docker containers to test CK and components using differe We would like to thank all [contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) and [collaborators](https://cKnowledge.org/partners.html) for their support, fruitful discussions, -and useful feedback! See more acknowledgments in the [CK journal article](https://arxiv.org/abs/2011.01149) +and useful feedback! See more acknowledgments in the [CK journal article](https://doi.org/10.1098/rsta.2020.0211) and [ACM TechTalk'21](https://www.youtube.com/watch?v=7zpeIVwICa4). diff --git a/ck/docs/mlperf-automation/tutorials/tvmcon-2021-automating-mlperf-with-tvm-and-ck.md b/ck/docs/mlperf-automation/tutorials/tvmcon-2021-automating-mlperf-with-tvm-and-ck.md index 7864a30895..190d7dc618 100644 --- a/ck/docs/mlperf-automation/tutorials/tvmcon-2021-automating-mlperf-with-tvm-and-ck.md +++ b/ck/docs/mlperf-automation/tutorials/tvmcon-2021-automating-mlperf-with-tvm-and-ck.md @@ -38,7 +38,7 @@ hardware. * [Apache TVM](https://tvm.apache.org) * CK "plug&play" automation framework: [GitHub](https://github.com/ctuning/ck), [Motivation](https://www.youtube.com/watch?v=7zpeIVwICa4), - [ArXiv](https://arxiv.org/abs/2011.01149), + [journal paper](https://doi.org/10.1098/rsta.2020.0211), [automation actions](https://github.com/mlcommons/ck/tree/master/ck/repo/module), [MLOps components](https://github.com/mlcommons/ck-mlops) * [ACM REQUEST-ASPLOS'18: the 1st Reproducible Tournament on Pareto-efficient Image Classification](https://cknow.io/c/event/repro-request-asplos2018) diff --git a/ck/docs/src/introduction.md b/ck/docs/src/introduction.md index d708d5d66b..c7d4e28e60 100644 --- a/ck/docs/src/introduction.md +++ b/ck/docs/src/introduction.md @@ -2,7 +2,7 @@ ## Project overview -* Philosophical Transactions of the Royal Society: [paper](https://arxiv.org/abs/2011.01149), [shorter pre-print](https://arxiv.org/abs/2006.07161) +* Philosophical Transactions of the Royal Society: [paper](https://doi.org/10.1098/rsta.2020.0211), [shorter pre-print](https://arxiv.org/abs/2006.07161) [](https://youtu.be/7zpeIVwICa4) diff --git a/cm-mlops/automation/cfg/_cm.json b/cm-mlops/automation/cfg/_cm.json new file mode 100644 index 0000000000..27f80fbd40 --- /dev/null +++ b/cm-mlops/automation/cfg/_cm.json @@ -0,0 +1,9 @@ +{ + "alias": "cfg", + "automation_alias": "automation", + "automation_uid": "bbeb15d8f0a944a4", + "tags": [ + "automation" + ], + "uid": "88dce9c160324c5d" +} diff --git a/cm-mlops/automation/cfg/module.py b/cm-mlops/automation/cfg/module.py new file mode 100644 index 0000000000..be8d6e7b1d --- /dev/null +++ b/cm-mlops/automation/cfg/module.py @@ -0,0 +1,52 @@ +import os + +from cmind.automation import Automation +from cmind import utils + +class CAutomation(Automation): + """ + Automation actions + """ + + ############################################################ + def __init__(self, cmind, automation_file): + super().__init__(cmind, __file__) + + ############################################################ + def test(self, i): + """ + Test automation + + Args: + (CM input dict): + + (out) (str): if 'con', output to console + + automation (str): automation as CM string object + + parsed_automation (list): prepared in CM CLI or CM access function + [ (automation alias, automation UID) ] or + [ (automation alias, automation UID), (automation repo alias, automation repo UID) ] + + (artifact) (str): artifact as CM string object + + (parsed_artifact) (list): prepared in CM CLI or CM access function + [ (artifact alias, artifact UID) ] or + [ (artifact alias, artifact UID), (artifact repo alias, artifact repo UID) ] + + ... + + Returns: + (CM return dict): + + * return (int): return code == 0 if no error and >0 if error + * (error) (str): error string if return>0 + + * Output from this automation action + + """ + + import json + print (json.dumps(i, indent=2)) + + return {'return':0} diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index 46dd3e6978..7fc9d527e1 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -1312,6 +1312,27 @@ def run(self, i): if "add_deps_recursive" in versions_meta: self._merge_dicts_with_tags(add_deps_recursive, versions_meta['add_deps_recursive']) + # Run chain of docker dependencies if current run cmd is from inside a docker container + docker_deps = [] + if i.get('docker_run_deps'): + docker_meta = meta.get('docker') + if docker_meta: + docker_deps = docker_meta.get('deps') + docker_deps = [ dep for dep in docker_deps if not dep.get('skip_inside_docker', False) ] + if len(docker_deps)>0: + + if verbose: + print (recursion_spaces + ' - Checkingdocker run dependencies on other CM scripts:') + + r = self._call_run_deps(docker_deps, self.local_env_keys, local_env_keys_from_meta, env, state, const, const_state, add_deps_recursive, + recursion_spaces + extra_recursion_spaces, + remembered_selections, variation_tags_string, False, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + if r['return']>0: return r + + if verbose: + print (recursion_spaces + ' - Processing env after docker run dependencies ...') + + update_env_with_values(env) # Check chain of dependencies on other CM scripts if len(deps)>0: diff --git a/cm-mlops/automation/script/module_misc.py b/cm-mlops/automation/script/module_misc.py index 1394196560..ef5dbd0e8e 100644 --- a/cm-mlops/automation/script/module_misc.py +++ b/cm-mlops/automation/script/module_misc.py @@ -1379,13 +1379,15 @@ def dockerfile(i): i_run_cmd = r['run_cmd'] + docker_run_cmd_prefix = i.get('docker_run_cmd_prefix', docker_settings.get('run_cmd_prefix', '')) + r = regenerate_script_cmd({'script_uid':script_uid, 'script_alias':script_alias, 'run_cmd':i_run_cmd, 'tags':tags, 'fake_run':True, 'docker_settings':docker_settings, - 'docker_run_cmd_prefix':i.get('docker_run_cmd_prefix','')}) + 'docker_run_cmd_prefix':docker_run_cmd_prefix}) if r['return']>0: return r run_cmd = r['run_cmd_string'] @@ -1469,6 +1471,21 @@ def dockerfile(i): return {'return':0} +def get_container_path(value): + path_split = value.split(os.sep) + if len(path_split) == 1: + return value + + new_value = '' + if "cache" in path_split and "local" in path_split: + new_path_split = [ "", "home", "cmuser" ] + repo_entry_index = path_split.index("local") + new_path_split += path_split[repo_entry_index:] + return "/".join(new_path_split) + + return value + + ############################################################ def docker(i): """ @@ -1629,6 +1646,7 @@ def docker(i): if c_input in i: env[docker_input_mapping[c_input]] = i[c_input] + container_env_string = '' # env keys corresponding to container mounts are explicitly passed to the container run cmd for index in range(len(mounts)): mount = mounts[index] @@ -1663,7 +1681,8 @@ def docker(i): if tmp_values: for tmp_value in tmp_values: if tmp_value in env: - new_container_mount = env[tmp_value] + new_container_mount = get_container_path(env[tmp_value]) + container_env_string += "--env.{}={} ".format(tmp_value, new_container_mount) else:# we skip those mounts mounts[index] = None skip = True @@ -1694,6 +1713,8 @@ def docker(i): docker_pre_run_cmds = i.get('docker_pre_run_cmds', []) + docker_settings.get('pre_run_cmds', []) + docker_run_cmd_prefix = i.get('docker_run_cmd_prefix', docker_settings.get('run_cmd_prefix', '')) + all_gpus = i.get('docker_all_gpus', docker_settings.get('all_gpus')) device = i.get('docker_device', docker_settings.get('device')) @@ -1702,6 +1723,10 @@ def docker(i): port_maps = i.get('docker_port_maps', docker_settings.get('port_maps', [])) + shm_size = i.get('docker_shm_size', docker_settings.get('shm_size', '')) + + extra_run_args = i.get('docker_extra_run_args', docker_settings.get('extra_run_args', '')) + if detached == '': detached = docker_settings.get('detached', '') @@ -1729,7 +1754,8 @@ def docker(i): 'docker_run_cmd_prefix':i.get('docker_run_cmd_prefix','')}) if r['return']>0: return r - run_cmd = r['run_cmd_string'] + run_cmd = r['run_cmd_string'] + ' ' + container_env_string + ' --docker_run_deps ' + env['CM_RUN_STATE_DOCKER'] = True if docker_settings.get('mount_current_dir','')=='yes': @@ -1781,6 +1807,12 @@ def docker(i): if port_maps: cm_docker_input['port_maps'] = port_maps + if shm_size != '': + cm_docker_input['shm_size'] = shm_size + + if extra_run_args != '': + cm_docker_input['extra_run_args'] = extra_run_args + print ('') diff --git a/cm-mlops/automation/utils/module.py b/cm-mlops/automation/utils/module.py index 33c3381c2b..2b479d5362 100644 --- a/cm-mlops/automation/utils/module.py +++ b/cm-mlops/automation/utils/module.py @@ -851,3 +851,30 @@ def prune_input(self, i): return {'return':0, 'new_input':i_run_cmd_arc} + + ############################################################################## + def uid(self, i): + """ + Generate CM UID. + + Args: + (CM input dict): empty dict + + Returns: + (CM return dict): + + * return (int): return code == 0 if no error and >0 if error + * (error) (str): error string if return>0 + + * uid (str): CM UID + """ + + console = i.get('out') == 'con' + + r = utils.gen_uid() + + if console: + print (r['uid']) + + return r + diff --git a/cm-mlops/challenge/add-derived-metrics-to-mlperf-inference-v3.1/README.md b/cm-mlops/challenge/add-derived-metrics-to-mlperf-inference-v3.1/README.md index 53bb599552..516e9b0695 100644 --- a/cm-mlops/challenge/add-derived-metrics-to-mlperf-inference-v3.1/README.md +++ b/cm-mlops/challenge/add-derived-metrics-to-mlperf-inference-v3.1/README.md @@ -5,9 +5,7 @@ and add derived metrics such as result/No of cores, power efficiency, device cos Add clock speed as a third dimension to graphs and improve Bar graph visualization. -Join our public [Discord server](https://discord.gg/JjWNWXKxwT) and/or -our [weekly conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw/edit) -to discuss this challenge with the organizers. +Join our public [Discord server](https://discord.gg/JjWNWXKxwT) to discuss this challenge with the organizers. Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) to run reference implementations of MLPerf inference benchmarks diff --git a/cm-mlops/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md b/cm-mlops/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md index ab3024ba32..30b48b8060 100644 --- a/cm-mlops/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md +++ b/cm-mlops/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md @@ -2,9 +2,7 @@ Connect CM workflows to run MLPerf inference benchmarks with [OpenBenchmarking.org](https://openbenchmarking.org). -Join our public [Discord server](https://discord.gg/JjWNWXKxwT) and/or -our [weekly conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw/edit) -to discuss this challenge with the organizers. +Join our public [Discord server](https://discord.gg/JjWNWXKxwT) to discuss this challenge with the organizers. Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) to run reference implementations of MLPerf inference benchmarks diff --git a/cm-mlops/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json b/cm-mlops/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json index 72b95bdbe9..aa291ba0e8 100644 --- a/cm-mlops/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json +++ b/cm-mlops/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json @@ -2,7 +2,7 @@ "alias": "connect-mlperf-inference-v3.1-with-openbenchmarking", "automation_alias": "challenge", "automation_uid": "3d84abd768f34e08", - "date_open": "20230704", + "date_open": "20240204", "date_close_extension": true, "points": 2, "prize_short": "co-authoring white paper", @@ -15,11 +15,7 @@ "automate", "openbenchmarking", "mlperf-inference", - "mlperf-inference-openbenchmarking", - "mlperf-inference-openbenchmarking", - "mlperf-inference-openbenchmarking-v3.1", - "mlperf-inference-openbenchmarking-v3.1-2023", - "v3.1" + "mlperf-inference-openbenchmarking" ], "title": "Run MLPerf inference benchmarks via OpenBenchmarking.org", "trophies": true, diff --git a/cm-mlops/challenge/connect-mlperf-with-medperf/README.md b/cm-mlops/challenge/connect-mlperf-with-medperf/README.md index 4277eba2e1..e1d2dfb6bb 100644 --- a/cm-mlops/challenge/connect-mlperf-with-medperf/README.md +++ b/cm-mlops/challenge/connect-mlperf-with-medperf/README.md @@ -6,9 +6,7 @@ using MLPerf loadgen and MLCommons CM automation language. See the [Nature 2023 article about MedPerf](https://www.nature.com/articles/s42256-023-00652-2) and [ACM REP'23 keynote about CM](https://doi.org/10.5281/zenodo.8105339) to learn more about these projects. -Join our public [Discord server](https://discord.gg/JjWNWXKxwT) and/or -our [weekly conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw/edit) -to discuss this challenge with the organizers. +Join our public [Discord server](https://discord.gg/JjWNWXKxwT) to discuss this challenge with the organizers. Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) to run reference implementations of MLPerf inference benchmarks diff --git a/cm-mlops/challenge/optimize-mlperf-inference-scc2023/_cm.json b/cm-mlops/challenge/optimize-mlperf-inference-scc2023/_cm.json index 335020e0b4..868d404d50 100644 --- a/cm-mlops/challenge/optimize-mlperf-inference-scc2023/_cm.json +++ b/cm-mlops/challenge/optimize-mlperf-inference-scc2023/_cm.json @@ -2,7 +2,7 @@ "alias": "optimize-mlperf-inference-scc2023", "automation_alias": "challenge", "automation_uid": "3d84abd768f34e08", - "_date_close": "20231115", + "date_close": "20231115", "date_open": "20230915", "tags": [ "automate", diff --git a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-android/README.md b/cm-mlops/challenge/optimize-mlperf-inference-v3.1-android/README.md deleted file mode 100644 index 1af3d813a0..0000000000 --- a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-android/README.md +++ /dev/null @@ -1,38 +0,0 @@ -### Challenge - -Some MLPerf inference benchmarks for the Edge (image classification, object detection, etc) are possible to run on Android devices. - -Add support to cross-compile our TFLite C++ implementation via CM to run some MLPerf inference benchmarks on any Android mobile phone. - -Join our public [Discord server](https://discord.gg/JjWNWXKxwT) and/or -our [weekly conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw/edit) -to discuss this challenge with the organizers. - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - - -### Prizes - -* *All contributors will participate in writing a common white paper about running and comparing MLPerf inference benchmarks out-of-the-box.* -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - - -### Organizers - -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - - - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/ck_mlperf_results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-android/_cm.json b/cm-mlops/challenge/optimize-mlperf-inference-v3.1-android/_cm.json deleted file mode 100644 index 8f3ca9f460..0000000000 --- a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-android/_cm.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-android", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_open": "20230704", - "date_close_extension": true, - "experiments": [], - "points": 2, - "prize": "300$ for the first implementation", - "prize_short": "co-authoring white paper , $$$", - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "android", - "mlperf-inference", - "mlperf-inference-android", - "mlperf-inference-android", - "mlperf-inference-android-v3.1", - "mlperf-inference-android-v3.1-2023", - "v3.1" - ], - "title": "Add support to run some MLPerf inference benchmarks for Edge on Android mobile phone", - "trophies": true, - "uid": "3ec574afcc594574" -} diff --git a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md b/cm-mlops/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md index 4dabd3ae3c..843c205e69 100644 --- a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md +++ b/cm-mlops/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md @@ -3,9 +3,7 @@ Create any end-to-end AI application with web cam, speech recognition, chat bot, LLM that uses any MLPerf model and CM automation. -Join our public [Discord server](https://discord.gg/JjWNWXKxwT) and/or -our [weekly conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw/edit) -to discuss this challenge with the organizers. +Join our public [Discord server](https://discord.gg/JjWNWXKxwT) to discuss this challenge with the organizers. Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. @@ -16,7 +14,6 @@ Looking forward to your submissions and happy hacking! * *All submitters will participate in writing a common white paper about running and comparing MLPerf inference benchmarks out-of-the-box.* * *All submitters will receive 1 point for submitting valid results for 1 complete benchmark on one system.* * *All submitters will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. ### Organizers diff --git a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md b/cm-mlops/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md index 40e0949a51..c4d8636579 100644 --- a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md +++ b/cm-mlops/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md @@ -5,9 +5,7 @@ Add CM interface to run MLPerf inference benchmarks on Intel-based platforms. You can start from reproducing any past MLPerf inference submission from Intel and their partners and then adding CM automation. -Join our public [Discord server](https://discord.gg/JjWNWXKxwT) and/or -our [weekly conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw/edit) -to discuss this challenge with the organizers. +Join our public [Discord server](https://discord.gg/JjWNWXKxwT) to discuss this challenge with the organizers. Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) to run reference implementations of MLPerf inference benchmarks diff --git a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json b/cm-mlops/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json index 11ce75a3c6..ccdd440a70 100644 --- a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json +++ b/cm-mlops/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json @@ -2,7 +2,7 @@ "alias": "optimize-mlperf-inference-v3.1-intel-2023", "automation_alias": "challenge", "automation_uid": "3d84abd768f34e08", - "date_close_extension": true, + "date_close": "20240104", "date_open": "20230704", "points": 2, "prize": "200$ for the first implementation and 200 for the fastest implementation", diff --git a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json b/cm-mlops/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json index 855c66b66b..350749fc6c 100644 --- a/cm-mlops/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json +++ b/cm-mlops/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json @@ -2,7 +2,7 @@ "alias": "optimize-mlperf-inference-v3.1-qualcomm-ai100-2023", "automation_alias": "challenge", "automation_uid": "3d84abd768f34e08", - "date_close_extension": true, + "date_close": "20240104", "date_open": "20230704", "points":3, "trophies":true, diff --git a/cm-mlops/challenge/optimize-mlperf-inference-v4.0-2024/README.md b/cm-mlops/challenge/optimize-mlperf-inference-v4.0-2024/README.md new file mode 100644 index 0000000000..3e0a705411 --- /dev/null +++ b/cm-mlops/challenge/optimize-mlperf-inference-v4.0-2024/README.md @@ -0,0 +1 @@ +Under preparation. Contact the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) for more details. diff --git a/cm-mlops/challenge/optimize-mlperf-inference-v4.0-2024/_cm.yaml b/cm-mlops/challenge/optimize-mlperf-inference-v4.0-2024/_cm.yaml new file mode 100644 index 0000000000..63ca123e47 --- /dev/null +++ b/cm-mlops/challenge/optimize-mlperf-inference-v4.0-2024/_cm.yaml @@ -0,0 +1,27 @@ +alias: optimize-mlperf-inference-v4.0-2024 +uid: e6b8738383eb46d0 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: Run and optimize MLPerf inference v4.0 benchmarks (Intel, Nvidia, Qualcomm, Arm64, TPU ...) and submit official results + +date_close: '20240225' +date_open: '20240205' + +tags: +- modularize +- optimize +- reproduce +- replicate +- automate +- benchmark +- mlperf +- mlperf-inference +- mlperf-inference-v4.0 +- mlperf-inference-v4.0-2024 +- v4.0 + + +experiments: +- tags: mlperf-inference,v4.0 diff --git a/cm-mlops/challenge/participate-hipeac-reproducibilty-challenge-2024/_cm.json b/cm-mlops/challenge/participate-hipeac-reproducibilty-challenge-2024/_cm.json index 9c9774ed6b..36016c454a 100644 --- a/cm-mlops/challenge/participate-hipeac-reproducibilty-challenge-2024/_cm.json +++ b/cm-mlops/challenge/participate-hipeac-reproducibilty-challenge-2024/_cm.json @@ -3,6 +3,7 @@ "alias": "participate-hipeac-reproducibilty-challenge-2024", "automation_alias": "challenge", "automation_uid": "3d84abd768f34e08", + "date_close": "20231115", "date_open": "20230915", "tags": [ "participate", diff --git a/cm-mlops/challenge/repro-micro2023/_cm.json b/cm-mlops/challenge/repro-micro2023/_cm.json index cbfae1f1fd..77fb5f773b 100644 --- a/cm-mlops/challenge/repro-micro2023/_cm.json +++ b/cm-mlops/challenge/repro-micro2023/_cm.json @@ -2,7 +2,7 @@ "alias": "repro-micro2023", "automation_alias": "challenge", "automation_uid": "3d84abd768f34e08", - "_date_close": "20230915", + "date_close": "20230915", "date_open": "20230702", "tags": [ "reproduce", diff --git a/cm-mlops/challenge/reproduce-automate-explain-past-mlperf-inference-results-2023/README.md b/cm-mlops/challenge/reproduce-automate-explain-past-mlperf-inference-results-2023/README.md index dc2339ff49..13c51af648 100644 --- a/cm-mlops/challenge/reproduce-automate-explain-past-mlperf-inference-results-2023/README.md +++ b/cm-mlops/challenge/reproduce-automate-explain-past-mlperf-inference-results-2023/README.md @@ -10,9 +10,7 @@ them by the community. A detailed experience report must be provided - if accepted, you will be able to present your findings at the upcoming HiPEAC'23 workshop and our upcoming white paper with MLCommons. -Join our public [Discord server](https://discord.gg/JjWNWXKxwT) and/or -our [weekly conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw/edit) -to discuss this challenge with the organizers. +Join our public [Discord server](https://discord.gg/JjWNWXKxwT) to discuss this challenge with the organizers. Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) to run reference implementations of MLPerf inference benchmarks @@ -23,7 +21,7 @@ Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn ### Prizes * *All contributors will be able to present their findings at the HiPEAC workshop on reproducibility and participate in writing a common white paper about running and comparing MLPerf inference benchmarks.* -* *All contributors will receive 2 point* +* *All contributors will receive 2 points* * *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* diff --git a/cm-mlops/challenge/train-llm-for-cm-mlperf-2023/README.md b/cm-mlops/challenge/train-llm-for-cm-mlperf-2023/README.md index 11d7d330e2..9449b02a60 100644 --- a/cm-mlops/challenge/train-llm-for-cm-mlperf-2023/README.md +++ b/cm-mlops/challenge/train-llm-for-cm-mlperf-2023/README.md @@ -3,9 +3,7 @@ Improve the prototype of our LLM-based assistant to suggest users how to run MLPerf inference benchmarks using the MLCommons CM automation language: https://access.cknowledge.org/assistant . -Join our public [Discord server](https://discord.gg/JjWNWXKxwT) and/or -our [weekly conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw/edit) -to discuss this challenge with the organizers. +Join our public [Discord server](https://discord.gg/JjWNWXKxwT) to discuss this challenge with the organizers. Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) to run reference implementations of MLPerf inference benchmarks diff --git a/cm-mlops/challenge/train-llm-for-cm-mlperf-2023/_cm.json b/cm-mlops/challenge/train-llm-for-cm-mlperf-2023/_cm.json index 287b07664e..7b498af3ed 100644 --- a/cm-mlops/challenge/train-llm-for-cm-mlperf-2023/_cm.json +++ b/cm-mlops/challenge/train-llm-for-cm-mlperf-2023/_cm.json @@ -2,6 +2,7 @@ "alias": "train-llm-for-cm-mlperf-2023", "automation_alias": "challenge", "automation_uid": "3d84abd768f34e08", + "date_close_extension": true, "date_open": "20230704", "experiments": [], "points": 3, diff --git a/cm-mlops/script/app-mlperf-inference-reference/customize.py b/cm-mlops/script/app-mlperf-inference-reference/customize.py index c22344df03..c0563ec0d6 100644 --- a/cm-mlops/script/app-mlperf-inference-reference/customize.py +++ b/cm-mlops/script/app-mlperf-inference-reference/customize.py @@ -293,6 +293,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + \ " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \ + ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ " --model-path " + env['MODEL_DIR'] elif "3d-unet" in env['CM_MODEL']: diff --git a/cm-mlops/script/app-mlperf-inference/README.md b/cm-mlops/script/app-mlperf-inference/README.md index c9688ea042..a4da99e6d8 100644 --- a/cm-mlops/script/app-mlperf-inference/README.md +++ b/cm-mlops/script/app-mlperf-inference/README.md @@ -187,6 +187,7 @@ ___ - *CM_MLPERF_IMPLEMENTATION*: `nvidia-original` - *CM_SQUAD_ACCURACY_DTYPE*: `float16` - *CM_IMAGENET_ACCURACY_DTYPE*: `int32` + - *CM_CNNDM_ACCURACY_DTYPE*: `int32` - *CM_LIBRISPEECH_ACCURACY_DTYPE*: `int8` - Workflow: 1. ***Read "deps" on other CM scripts*** diff --git a/cm-mlops/script/app-mlperf-inference/_cm.yaml b/cm-mlops/script/app-mlperf-inference/_cm.yaml index e517c90d21..994eb72f71 100644 --- a/cm-mlops/script/app-mlperf-inference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference/_cm.yaml @@ -229,10 +229,13 @@ variations: tags: _float16 librispeech-accuracy-script: tags: _int8 + cnndm-accuracy-script: + tags: _int32 env: CM_MLPERF_IMPLEMENTATION: nvidia-original CM_SQUAD_ACCURACY_DTYPE: float16 CM_IMAGENET_ACCURACY_DTYPE: int32 + CM_CNNDM_ACCURACY_DTYPE: int32 CM_LIBRISPEECH_ACCURACY_DTYPE: int8 deps: - tags: get,cuda-devices @@ -1032,8 +1035,6 @@ invalid_variation_combinations: - gptj - tf -gui_title: "CM GUI for the MLPerf inference benchmark" - input_description: scenario: desc: "MLPerf inference scenario" @@ -1093,3 +1094,6 @@ input_description: desc: "Generate README with the reproducibility report" debug: desc: "Debug MLPerf script" + +gui: + title: "CM GUI for the MLPerf inference benchmark" diff --git a/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml b/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml index 2a618e9ba3..f990b93e9a 100644 --- a/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml +++ b/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml @@ -189,21 +189,21 @@ versions: nvidia-inference-common-code: version: r2.1 nvidia-scratch-space: - tags: version.2_1 + tags: _version.2_1 r3.0: add_deps_recursive: nvidia-inference-common-code: version: r3.0 nvidia-scratch-space: - tags: version.3_0 + tags: _version.3_0 r3.1: add_deps_recursive: nvidia-inference-common-code: version: r3.1 nvidia-scratch-space: - tags: version.3_1 + tags: _version.4_0 deps: - tags: install,nccl,libs,_cuda - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj @@ -213,6 +213,8 @@ versions: docker: skip_run_cmd: 'no' all_gpus: 'yes' + shm_size: '32gb' + extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME' docker_os: ubuntu docker_real_run: False interactive: True @@ -230,8 +232,15 @@ docker: scratch_path: MLPERF_SCRATCH_PATH deps: - tags: get,mlperf,inference,nvidia,scratch,space + - tags: get,mlperf,inference,results,dir + - tags: get,mlperf,inference,submission,dir + pre_run_cmds: + - cd CM/repos/ctuning@mlcommons-ck && git pull + run_cmd_prefix: sudo apt remove -y cmake && cd CM/repos/ctuning@mlcommons-ck && git pull && cd - mounts: - "${{ IMAGENET_PATH }}:/data/imagenet-val" + - "${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}:${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}" + - "${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}:${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}" - "${{ RESULTS_DIR }}:/home/cmuser/results_dir" - "${{ SUBMISSION_DIR }}:/home/cmuser/submission_dir" - "${{ CM_CUDNN_TAR_FILE_PATH }}:${{ CM_CUDNN_TAR_FILE_PATH }}" diff --git a/cm-mlops/script/generate-mlperf-tiny-submission/README.md b/cm-mlops/script/generate-mlperf-tiny-submission/README.md index 4615fcdaab..cebfa2e361 100644 --- a/cm-mlops/script/generate-mlperf-tiny-submission/README.md +++ b/cm-mlops/script/generate-mlperf-tiny-submission/README.md @@ -151,7 +151,6 @@ ___ - CM script: [set-echo-off-win](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/set-echo-off-win) - CM script: [reproduce-ipol-paper-2022-439](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/reproduce-ipol-paper-2022-439) - CM script: [get-mlperf-inference-nvidia-common-code](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-nvidia-common-code) - - CM script: [install-qaic-compute-sdk-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-qaic-compute-sdk-from.src) - CM script: [destroy-terraform](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/destroy-terraform) - CM script: [get-dataset-cnndm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-dataset-cnndm) - CM script: [build-dockerfile](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/build-dockerfile) @@ -187,7 +186,6 @@ ___ - CM script: [get-google-test](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-google-test) - CM script: [get-dataset-criteo](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-dataset-criteo) - CM script: [truncate-mlperf-inference-accuracy-log](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) - - CM script: [install-pytorch-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-pytorch-from.src) - CM script: [get-ml-model-retinanet-nvidia](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-retinanet-nvidia) - CM script: [reproduce-micro-paper-2023-victima](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/reproduce-micro-paper-2023-victima) - CM script: [process-ae-users](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/process-ae-users) @@ -339,7 +337,6 @@ ___ - CM script: [print-hello-world-py](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/print-hello-world-py) - CM script: [print-hello-world-java](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/print-hello-world-java) - CM script: [app-mlperf-training-reference](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/app-mlperf-training-reference) - - CM script: [install-mlperf-logging-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-mlperf-logging-from.src) - CM script: [get-zephyr-sdk](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-zephyr-sdk) - CM script: [get-python3](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-python3) - CM script: [reproduce-ieee-acm-micro2023-paper-87](https://github.com/ctuning/cm-reproduce-research-projects/tree/master/script/reproduce-ieee-acm-micro2023-paper-87) @@ -384,10 +381,21 @@ ___ - CM script: [test-script5](https://github.com/gfursin/cm-tests/tree/master/script/test-script5) - CM script: [get-preprocessed-dataset-openorca](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-preprocessed-dataset-openorca) - CM script: [create-conda-env](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/create-conda-env) - - CM script: [install-ipex-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-ipex-from.src) - - CM script: [install-onednn-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onednn-from.src) - - CM script: [install-onnxruntime-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onnxruntime-from.src) - - CM script: [install-transformers-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-transformers-from.src) + - CM script: [install-qaic-compute-sdk-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-qaic-compute-sdk-from-src) + - CM script: [fail](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/fail) + - CM script: [install-onnxruntime-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onnxruntime-from-src) + - CM script: [install-ipex-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-ipex-from-src) + - CM script: [install-mlperf-logging-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-mlperf-logging-from-src) + - CM script: [install-onednn-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onednn-from-src) + - CM script: [install-pytorch-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-pytorch-from-src) + - CM script: [install-transformers-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-transformers-from-src) + - CM script: [create-patch](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/create-patch) + - CM script: [my-script](my-script) + - CM script: [get-mlperf-inference-intel-scratch-space](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-intel-scratch-space) + - CM script: [get-mlperf-inference-results-dir](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-results-dir) + - CM script: [get-mlperf-inference-submission-dir](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-submission-dir) + - CM script: [install-nccl-libs](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-nccl-libs) + - CM script: [install-pytorch-kineto-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-pytorch-kineto-from-src) ___ diff --git a/cm-mlops/script/get-cudnn/README.md b/cm-mlops/script/get-cudnn/README.md index b1838fb0c6..7e91089015 100644 --- a/cm-mlops/script/get-cudnn/README.md +++ b/cm-mlops/script/get-cudnn/README.md @@ -26,6 +26,9 @@ ### About + +See extra [notes](README-extra.md) from the authors and contributors. + #### Summary * Category: *CUDA automation.* @@ -165,11 +168,13 @@ ___ * `+DYLD_FALLBACK_LIBRARY_PATH` * `+LD_LIBRARY_PATH` * `+PATH` +* `CM_CUDA_PATH_INCLUDE_CUDNN` * `CM_CUDA_PATH_LIB_CUDNN` * `CM_CUDA_PATH_LIB_CUDNN_EXISTS` * `CM_CUDNN_*` #### New environment keys auto-detected from customize +* `CM_CUDA_PATH_INCLUDE_CUDNN` * `CM_CUDA_PATH_LIB_CUDNN` * `CM_CUDA_PATH_LIB_CUDNN_EXISTS` * `CM_CUDNN_VERSION` diff --git a/cm-mlops/script/get-cudnn/_cm.json b/cm-mlops/script/get-cudnn/_cm.json index e067bf629a..f339286f7a 100644 --- a/cm-mlops/script/get-cudnn/_cm.json +++ b/cm-mlops/script/get-cudnn/_cm.json @@ -48,6 +48,5 @@ ], "uid": "d73ee19baee14df8", "docker": { - "run": false } } diff --git a/cm-mlops/script/get-mlperf-inference-loadgen/README.md b/cm-mlops/script/get-mlperf-inference-loadgen/README.md index 04e049ae99..73229afa74 100644 --- a/cm-mlops/script/get-mlperf-inference-loadgen/README.md +++ b/cm-mlops/script/get-mlperf-inference-loadgen/README.md @@ -170,9 +170,9 @@ ___ * get,compiler * `if (CM_HOST_OS_TYPE != windows)` * CM names: `--adr.['compiler']...` + - CM script: [get-llvm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-llvm) - CM script: [get-cl](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-cl) - CM script: [get-gcc](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-gcc) - - CM script: [get-llvm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-llvm) * get,cl * `if (CM_HOST_OS_TYPE == windows)` * CM names: `--adr.['compiler']...` diff --git a/cm-mlops/script/get-mlperf-inference-nvidia-common-code/README.md b/cm-mlops/script/get-mlperf-inference-nvidia-common-code/README.md index 208bc78ca6..7dfd1d4c71 100644 --- a/cm-mlops/script/get-mlperf-inference-nvidia-common-code/README.md +++ b/cm-mlops/script/get-mlperf-inference-nvidia-common-code/README.md @@ -141,6 +141,7 @@ ___ * get,mlperf,inference,results * CM names: `--adr.['mlperf-inference-results']...` - CM script: [get-mlperf-inference-results](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-results) + - CM script: [get-mlperf-inference-results-dir](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-results-dir) 1. ***Run "preprocess" function from [customize.py](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-nvidia-common-code/customize.py)*** 1. Read "prehook_deps" on other CM scripts from [meta](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-nvidia-common-code/_cm.json) 1. ***Run native script if exists*** diff --git a/cm-mlops/script/get-tensorrt/_cm.json b/cm-mlops/script/get-tensorrt/_cm.json index 174feeb9aa..b1aa40e06a 100644 --- a/cm-mlops/script/get-tensorrt/_cm.json +++ b/cm-mlops/script/get-tensorrt/_cm.json @@ -47,6 +47,5 @@ } }, "docker": { - "run": false } } diff --git a/cm-mlops/script/gui/app.py b/cm-mlops/script/gui/app.py index 0db457dd85..f732599bc1 100644 --- a/cm-mlops/script/gui/app.py +++ b/cm-mlops/script/gui/app.py @@ -8,14 +8,7 @@ def main(): var1 = '^' if os.name == 'nt' else '\\' - compatibility = False - try: - query_params = st.query_params - except: - compatibility = True - - if compatibility: - query_params = st.experimental_get_query_params() + query_params = misc.get_params(st) script_path = os.environ.get('CM_GUI_SCRIPT_PATH','') script_alias = os.environ.get('CM_GUI_SCRIPT_ALIAS','') @@ -50,6 +43,7 @@ def main(): if len(lst)==1: script = lst[0] meta = script.meta + script_path = script.path script_alias = meta['alias'] @@ -63,11 +57,22 @@ def main(): script_path = script.path script_alias = meta['alias'] - if meta.get('gui_title','')!='': - title = meta['gui_title'] + gui_meta = meta.get('gui',{}) + + gui_func = gui_meta.get('use_customize_func', '') + if gui_func!='': + ii = {'streamlit_module':st, + 'meta':meta} + return cmind.utils.call_internal_module(None, os.path.join(script_path, 'dummy') , + 'customize', gui_func, ii) + + + if gui_meta.get('title','')!='': + title = gui_meta['title'] + # Set title - st.title('Collective Mind GUI') + st.title('[Collective Mind](https://github.com/mlcommons/ck)') if script_alias!='': st.markdown('*CM script: "{}"*'.format(script_alias)) @@ -136,33 +141,34 @@ def main(): # Prepare variation_groups # st.markdown("""---""") - st.subheader('Script variations') - - variation_groups_order = meta.get('variation_groups_order',[]) - for variation in sorted(variation_groups): - if variation not in variation_groups_order: - variation_groups_order.append(variation) - - for group_key in variation_groups_order: - group_key_cap = group_key.replace('-',' ').capitalize() - if not group_key.startswith('*'): - y = [''] - - index = 0 - selected_index = 0 - for variation_key in sorted(variation_groups[group_key]): - index += 1 - y.append(variation_key) - if variation_key in default_variations: - selected_index=index - - st_variations['~'+group_key] = st.selectbox(group_key_cap, sorted(y), index=selected_index, key='~'+group_key) - elif group_key == '*no-group*': - for variation_key in sorted(variation_groups[group_key]): - x = False - if variation_key in default_variations: - x=True - st_variations['#'+variation_key] = st.checkbox(variation_key.capitalize(), key='#'+variation_key, value=x) + if len(variations)>0: + st.subheader('Script variations') + + variation_groups_order = meta.get('variation_groups_order',[]) + for variation in sorted(variation_groups): + if variation not in variation_groups_order: + variation_groups_order.append(variation) + + for group_key in variation_groups_order: + group_key_cap = group_key.replace('-',' ').capitalize() + if not group_key.startswith('*'): + y = [''] + + index = 0 + selected_index = 0 + for variation_key in sorted(variation_groups[group_key]): + index += 1 + y.append(variation_key) + if variation_key in default_variations: + selected_index=index + + st_variations['~'+group_key] = st.selectbox(group_key_cap, sorted(y), index=selected_index, key='~'+group_key) + elif group_key == '*no-group*': + for variation_key in sorted(variation_groups[group_key]): + x = False + if variation_key in default_variations: + x=True + st_variations['#'+variation_key] = st.checkbox(variation_key.capitalize(), key='#'+variation_key, value=x) # Prepare inputs @@ -293,7 +299,7 @@ def main(): if y!='': x+=y - st.text_area('**Install [CM interface](https://github.com/mlcommons/ck) with a few dependencies:**', x, height=170) + st.text_area('**Install [MLCommons CM](https://github.com/mlcommons/ck/blob/master/docs/installation.md) with a few dependencies:**', x, height=170) st.markdown("**Run CM script from Python:**") diff --git a/cm-mlops/script/gui/graph.py b/cm-mlops/script/gui/graph.py index 6d779e2d4c..409e350c5b 100644 --- a/cm-mlops/script/gui/graph.py +++ b/cm-mlops/script/gui/graph.py @@ -60,14 +60,8 @@ def __init__(self, points, targets=None): def main(): - compatibility = False - try: - params = st.query_params - except: - compatibility = True - - if compatibility: - params = st.experimental_get_query_params() + + params = misc.get_params(st) # Set title st.title('CM experiment visualization') diff --git a/cm-mlops/script/gui/misc.py b/cm-mlops/script/gui/misc.py index a72e369b09..757965c2c5 100644 --- a/cm-mlops/script/gui/misc.py +++ b/cm-mlops/script/gui/misc.py @@ -1,5 +1,6 @@ # Support functions +########################################################## def make_url(name, alias='', action='contributors', key='name', md=True): import urllib @@ -17,6 +18,7 @@ def make_url(name, alias='', action='contributors', key='name', md=True): return md +########################################################## def convert_date(date): # date: format YYYYMMDD to YYYY month day @@ -30,3 +32,23 @@ def convert_date(date): return {'return':1, 'error':'date "{}" is not of format YYYYMMDD: {}'.format(date, format(e))} return {'return':0, 'string':year+' '+month+' '+day} + +########################################################## +def get_params(st): + compatibility = False + + try: + params2 = st.query_params + # Convert to old style + params = {} + for k in params2: + v = params2[k] + if type(v)!=list: + params[k]=[v] + except: + compatibility = True + + if compatibility: + params = st.experimental_get_query_params() + + return params diff --git a/cm-mlops/script/gui/playground.py b/cm-mlops/script/gui/playground.py index 081bd48341..f994e6eb51 100644 --- a/cm-mlops/script/gui/playground.py +++ b/cm-mlops/script/gui/playground.py @@ -13,14 +13,7 @@ def main(): st.set_page_config(layout="wide", menu_items={}) - compatibility = False - try: - params = st.query_params - except: - compatibility = True - - if compatibility: - params = st.experimental_get_query_params() + params = misc.get_params(st) # Set style # Green: background:#7fcf6f; @@ -56,8 +49,8 @@ def main(): st.write('''

Collective Knowledge Playground

- -
+
+ Collaborative Benchmarking and Optimization of AI Systems and Applications
{}
@@ -68,6 +61,7 @@ def main(): # Check action and basic menu action = params.get('action',['contributors'])[0].lower() + style_action_howtorun='font-style:italic;font-weight:bold;color:#ffffff' if action=='howtorun' else '' style_action_challenges='font-style:italic;font-weight:bold;color:#ffffff' if action=='challenges' else '' style_action_experiments='font-style:italic;font-weight:bold;color:#ffffff' if action=='experiments' else '' style_action_contributors='font-style:italic;font-weight:bold;color:#ffffff' if action=='contributors' else '' @@ -76,6 +70,7 @@ def main(): st.write('''
+ @@ -85,6 +80,7 @@ def main():
'''.format( + style_action_howtorun, style_action_contributors, style_action_challenges, style_action_experiments, @@ -100,7 +96,10 @@ def main(): r={'return':0} - if action == 'challenges': + if action == 'howtorun': + from playground_howtorun import page + r = page(st, params) + elif action == 'challenges': from playground_challenges import page r = page(st, params) elif action == 'experiments': diff --git a/cm-mlops/script/gui/playground_challenges.py b/cm-mlops/script/gui/playground_challenges.py index f121e317e3..2b61101e36 100644 --- a/cm-mlops/script/gui/playground_challenges.py +++ b/cm-mlops/script/gui/playground_challenges.py @@ -125,12 +125,13 @@ def page(st, params): x = '''

Ongoing reproducibility and optimization challenges

+ Organized by the MLCommons Task Force on Automation and Reproducibility

- -->
''' st.write(x, unsafe_allow_html = True) @@ -204,13 +205,13 @@ def page(st, params): awards += '🏆' - prize = row.get('prize_short','') - if prize!='': - x += '   Prizes from [MLCommons organizations](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https:/cKnowledge.org): **{}**\n'.format(prize) - if awards!='': awards+=' , ' - awards += prize - - xrow.append(awards) +# prize = row.get('prize_short','') +# if prize!='': +# x += '   Prizes from [MLCommons organizations](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https:/cKnowledge.org): **{}**\n'.format(prize) +# if awards!='': awards+=' , ' +# awards += prize +# +# xrow.append(awards) if x!='': @@ -227,7 +228,7 @@ def page(st, params): import numpy as np df = pd.DataFrame(data, - columns=['Challenge', 'Closing date', 'Extension', 'Contributor award and prizes from MLCommons organizations, cTuning foundation and cKnowledge.org']) + columns=['Challenge', 'Closing date', 'Extension']) df.index+=1 diff --git a/cm-mlops/script/gui/playground_challenges_with_prizes.py b/cm-mlops/script/gui/playground_challenges_with_prizes.py new file mode 100644 index 0000000000..f121e317e3 --- /dev/null +++ b/cm-mlops/script/gui/playground_challenges_with_prizes.py @@ -0,0 +1,456 @@ +# Developer(s): Grigori Fursin + +import cmind +import os +import datetime +import misc + +def page(st, params): + + url_prefix = st.config.get_option('server.baseUrlPath')+'/' + + name = params.get('name',[''])[0].strip() + tags = params.get('tags',[''])[0].lower() + + ii = {'action':'find', + 'automation':'challenge,3d84abd768f34e08'} + + if name!='': + ii['artifact']=name + if tags!='': + ii['tags']=tags + + r = cmind.access(ii) + if r['return']>0: return r + + lst = r['list'] + + end_html = '' + + if len(lst)==0: + st.markdown('Challenges were not found!') + else: + artifact = None + + if len(lst)==1: + artifact = lst[0] + else: + challenges = [] + + date_now = datetime.datetime.now().isoformat() + date_now2 = int(date_now[0:4]+date_now[5:7]+date_now[8:10]) + + ongoing = [] + + for l in sorted(lst, key=lambda x: ( + -int(x.meta.get('date_open','0')), + -int(x.meta.get('date_close','0')), + x.meta.get('title','') + )): + + row = {} + + meta = l.meta + row['uid']= meta['uid'] + + name = meta.get('title', meta['alias']) + + row['name']=name + + for k in ['date_close_extension', 'points', 'trophies', 'prize', 'prize_short', 'skip', 'sort']: + if k in meta: + row[k]=meta[k] + + under_preparation = meta.get('under_preparation', False) + row['under_preparation']=under_preparation + + date_open = meta.get('date_open','') + date_close = meta.get('date_close','') + + s_date_open = '' + if date_open!='': + r = misc.convert_date(date_open) + s_date_open = r['string'] if r['return']==0 else '' + + row['orig_date_open']=date_open + row['date_open']=s_date_open + + s_date_close = '' + if date_close!='': + r = misc.convert_date(date_close) + s_date_close = r['string'] if r['return']==0 else '' + + row['orig_date_close']=date_close + row['date_close']=s_date_close + + diff1 = 0 + diff2 = 0 + + if date_open!='': + diff1 = int(date_open)-int(date_now2) + + if date_close!='': + diff2 = int(date_close)-int(date_now2) + + + prefix = '' + if under_preparation: + prefix = 'Under preparation: ' + else: + if date_open!='' and diff1>0: + prefix = 'Opens on {}: '.format(s_date_open) + else: + if date_close!='': + if diff2<0: + prefix = 'Finished on {}: '.format(s_date_close) + else: + prefix = 'Open and finishes on {}: '.format(s_date_close) + else: + prefix = 'Open: '.format(s_date_close) + + + # Check if open challenge even if under preparation + if date_open and (date_close=='' or (diff1<=0 and diff2>0)): + ongoing.append(row) + else: + challenges.append({'prefix':prefix, 'name':name, 'uid':l.meta['uid']}) + + + + + # Show ongoing if open + if len(ongoing)>0: + ind = 1 + + x = ''' +
+

Ongoing reproducibility and optimization challenges

+ +
+ ''' + st.write(x, unsafe_allow_html = True) + + data = [] + + for row in sorted(ongoing, key=lambda row: (int(row.get('orig_date_close', 9999999999)), + row.get('sort', 0), + row.get('name', ''), + row.get('under_preparation', False) + )): + if row.get('skip',False): continue + + xrow = [] + + md = '' + up = row.get('under_preparation', False) + + x = row['name'] + y = '' + if up: + x = x[0].lower() + x[1:] + y = 'Under preparation: ' + + url = url_prefix + '?action=challenges&name={}'.format(row['uid']) +# md += '###### {}) {}[{}]({})\n'.format(str(ind), y, x, url) + + x = ''' +
+ + {}{} + +
+ '''.format(y, url, x).replace('\n','') +# st.write(x, unsafe_allow_html = True) + + xrow.append(x) + + # Assemble info + x='' + + date_close = row.get('date_close','') + y = '' + if date_close!='' and date_close!=None: + x += '   Closing date: **{}**\n'.format(date_close) + y = date_close.replace(' ',' ') + + xrow.append(y) + + y = '' + if row.get('date_close_extension',False): + y = 'until done' + + xrow.append(y) + +# points = row.get('points',0) +# y = '' +# if points>0: +# x += '   Points: **{}**\n'.format(str(points)) +# y = str(points) +# +# xrow.append(y) + + + + awards = '' + + trophies = row.get('trophies',False) + if trophies: + x += '   Trophy: **Yes**\n' + awards += '🏆' + + + prize = row.get('prize_short','') + if prize!='': + x += '   Prizes from [MLCommons organizations](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https:/cKnowledge.org): **{}**\n'.format(prize) + if awards!='': awards+=' , ' + awards += prize + + xrow.append(awards) + + + if x!='': + md += '     '+x + +# st.markdown(md) + + + data.append(xrow) + ind+=1 + + + import pandas as pd + import numpy as np + + df = pd.DataFrame(data, + columns=['Challenge', 'Closing date', 'Extension', 'Contributor award and prizes from MLCommons organizations, cTuning foundation and cKnowledge.org']) + + df.index+=1 + +# st.table(df) + st.write(df.to_html(escape=False, justify='left'), unsafe_allow_html=True) + + # Show selector for all +# challenge = st.selectbox('View past benchmarking, optimization, reproducibility and replicability challenges:', +# range(len(challenges)), +# format_func=lambda x: challenges[x], +# index=0, key='challenge') +# +# if challenge>0: +# artifact = artifacts[challenge] + + + + + # Process 1 challenge + if artifact is None: +# st.markdown('#### Past or future challenges:') + + x = ''' +
+

Future or past challenges

+
+ ''' + st.write(x, unsafe_allow_html = True) + + + for c in challenges: + + prefix = c['prefix'] + name = c['name'] + uid = c['uid'] + + url = url_prefix + '?action=challenges&name={}'.format(uid) + + x = ''' +
+ {}) {}{} +
+ '''.format(str(ind), prefix, url, name) + + st.write(x, unsafe_allow_html = True) + + ind+=1 + + + + + + + + + + + + else: + meta = artifact.meta + + name = meta.get('title', meta['alias']) + uid = meta['uid'] + + st.write(''' +
+

Challenge: {}

+
+ '''.format(name), + unsafe_allow_html=True + ) + + end_html='
Self link
'.format(misc.make_url(meta['uid'], action='challenges', md=False)) + + + # Check basic password + password_hash = meta.get('password_hash','') + view = True + if password_hash!='': + view = False + + password = st.text_input("Enter password", type="password", key="password") + + if password!='': + import bcrypt + # TBD: temporal hack to demo password protection for experiments + password_salt = b'$2b$12$ionIRWe5Ft7jkn4y/7C6/e' + password_hash2 = bcrypt.hashpw(password.encode('utf-8'), password_salt) + + if password_hash.encode('utf-8')==password_hash2: + view=True + else: + st.markdown('**Warning:** wrong password') + + if not view: + return {'return':0, 'end_html':end_html} + + + + z = '' + date_open = meta.get('date_open','') + if date_open!='': + # Format YYYYMMDD + r = misc.convert_date(date_open) + if r['return']>0: return r + z+='* **Open date:** {}\n'.format(r['string']) + + date_close = meta.get('date_close','') + if date_close!='': + # Format YYYYMMDD + r = misc.convert_date(date_close) + if r['return']>0: return r + z+='* **Closing date:** {}\n'.format(r['string']) + + if meta.get('trophies', False): + z+='* **MLCommons Collective Knowledge Contributor award:** Yes\n' + + prize_short = meta.get('prize_short','') + if prize_short!='': + z+='* **Prizes:** {}\n'.format(prize_short) + +# prize = meta.get('prize','') +# if prize!='': +# z+='* **Student prizes:** {}\n'.format(prize) + + + urls = meta.get('urls',[]) + url = meta.get('url', '') + + if url!='': urls.append(url) + + if len(urls)>0: + x = '* **External link:** ' + md = '' + if len(urls)>1: + md = '* **External links:**\n' + x=' * ' + + for u in urls: + md+=x+'[{}]({})\n'.format(u,u) + z+=md+'\n' + + + # Check if has linked experiments + experiments = meta.get('experiments',[]) + + if len(experiments)>0: + md = '* **Shared experiments:**\n' + + for e in experiments: + tags = e.get('tags','') + name = e.get('name','') + + if tags!='': + md+=' * '+misc.make_url(tags, action='experiments', key='tags') + elif name!='': + md+=' * '+misc.make_url(name, action='experiments') + + z+=md+'\n' + + st.markdown(z) + + + # Check if has text + path = artifact.path + + for f in ['README.md', 'info.html']: + f1 = os.path.join(path, f) + if os.path.isfile(f1): + r = cmind.utils.load_txt(f1) + if r['return']>0: return r + + s = r['string'] + + st.markdown('---') + + if f.endswith('.html'): + y = s.split('\n') + ss = '' + for x in y: + ss+=x.strip()+'\n' + + st.write(ss, unsafe_allow_html=True) + else: + st.markdown(s) + + break + + # Check associated reports + r=cmind.access({'action':'find', + 'automation':'report,6462ecdba2054467', + 'tags':'challenge-{}'.format(uid)}) + if r['return']>0: return r + + lst = r['list'] + + for l in lst: + report_path = l.path + + f1 = os.path.join(report_path, 'README.md') + if os.path.isfile(f1): + report_meta = l.meta + + report_alias = report_meta['alias'] + report_title = report_meta.get('title','') + + report_name = report_title if report_title!='' else report_alias + + r = cmind.utils.load_txt(f1) + if r['return']>0: return r + + s = r['string'] + + st.markdown('---') + st.markdown('### '+report_name) + + st.markdown(s, unsafe_allow_html=True) + + + + + + + + + return {'return':0, 'end_html':end_html} diff --git a/cm-mlops/script/gui/playground_howtorun.py b/cm-mlops/script/gui/playground_howtorun.py new file mode 100644 index 0000000000..a8e6fef3b7 --- /dev/null +++ b/cm-mlops/script/gui/playground_howtorun.py @@ -0,0 +1,39 @@ +# Developer(s): Grigori Fursin + +import cmind +import os +import misc + +import streamlit.components.v1 as components + +import streamlit as st + +announcement = 'Under preparation. Please check the [MLCommons CM automation project](https://github.com/mlcommons/ck) for more details ...' + + +def main(): + params = misc.get_params(st) + + # Set title + st.title('How to run benchmarks') + + st.markdown(announcement) + + + return page(st, params) + + + + +def page(st, params, action = ''): + + st.markdown('----') + st.markdown(announcement) + + url_prefix = st.config.get_option('server.baseUrlPath')+'/' + + st.markdown(url_prefix) + + + + return {'return':0} diff --git a/cm-mlops/script/install-cmake-prebuilt/customize.py b/cm-mlops/script/install-cmake-prebuilt/customize.py index e5ff84eb9d..263e667c47 100644 --- a/cm-mlops/script/install-cmake-prebuilt/customize.py +++ b/cm-mlops/script/install-cmake-prebuilt/customize.py @@ -17,6 +17,12 @@ def preprocess(i): print (recursion_spaces + ' # Requested version: {}'.format(need_version)) + version_split = need_version.split(".") + while len(version_split) < 3: + version_split.append("0") + + need_version = ".".join(version_split) + host_os_bits = env['CM_HOST_OS_BITS'] if os_info['platform'] != 'windows': diff --git a/cm-mlops/script/install-pytorch-from-src/README.md b/cm-mlops/script/install-pytorch-from-src/README.md index d6c8c9fb23..0a74618c8b 100644 --- a/cm-mlops/script/install-pytorch-from-src/README.md +++ b/cm-mlops/script/install-pytorch-from-src/README.md @@ -112,6 +112,10 @@ ___ - *CUDNN_LIBRARY_PATH*: `<<>>` - *CUDNN_INCLUDE_PATH*: `<<>>` - *CUDA_NVCC_EXECUTABLE*: `<<>>` + - *USE_CUDA*: `1` + - *USE_CUDNN*: `1` + - *TORCH_CUDA_ARCH_LIST*: `Ampere Ada Hopper` + - *TORCH_CXX_FLAGS*: `-D_GLIBCXX_USE_CXX11_ABI=1` - Workflow: 1. ***Read "deps" on other CM scripts*** * get,cuda,_cudnn @@ -170,12 +174,8 @@ ___ * CM names: `--adr.['conda-package', 'libstdcxx-ng']...` - CM script: [install-generic-conda-package](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-generic-conda-package) * `_for-nvidia-mlperf-inference-v3.1-gptj` - - Environment variables: - - *CM_CONDA_ENV*: `yes` - Workflow: 1. ***Read "deps" on other CM scripts*** - * get,conda,_name.nvidia - - CM script: [get-conda](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-conda) * get,cmake - CM script: [get-cmake](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-cmake) * `_sha.#` diff --git a/cm-mlops/script/install-pytorch-from-src/_cm.json b/cm-mlops/script/install-pytorch-from-src/_cm.json index 7225c86ed8..bf9288b7d9 100644 --- a/cm-mlops/script/install-pytorch-from-src/_cm.json +++ b/cm-mlops/script/install-pytorch-from-src/_cm.json @@ -224,17 +224,11 @@ "sha.b5021ba9", "cuda" ], - "env": { - "CM_CONDA_ENV": "yes" - }, "deps": [ - { - "tags": "get,conda,_name.nvidia" - }, { "tags": "get,cmake", - "version_min": "3.18" - } + "version_min": "3.25.0" + } ] }, "cuda": { @@ -248,7 +242,11 @@ "CUDA_HOME": "<<>>", "CUDNN_LIBRARY_PATH": "<<>>", "CUDNN_INCLUDE_PATH": "<<>>", - "CUDA_NVCC_EXECUTABLE": "<<>>" + "CUDA_NVCC_EXECUTABLE": "<<>>", + "USE_CUDA": "1", + "USE_CUDNN": "1", + "TORCH_CUDA_ARCH_LIST": "Ampere Ada Hopper", + "TORCH_CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=1" } } }, diff --git a/cm-mlops/script/install-pytorch-from-src/run.sh b/cm-mlops/script/install-pytorch-from-src/run.sh index cbfbb42673..56c4b4f506 100644 --- a/cm-mlops/script/install-pytorch-from-src/run.sh +++ b/cm-mlops/script/install-pytorch-from-src/run.sh @@ -1,18 +1,17 @@ #!/bin/bash -export PATH=${CM_CONDA_BIN_PATH}:$PATH -export LD_LIBRARY_PATH="" #Don't use conda libs - CUR_DIR=$PWD rm -rf pytorch cp -r ${CM_PYTORCH_SRC_REPO_PATH} pytorch cd pytorch +git submodule sync +git submodule update --init --recursive rm -rf build -python -m pip install -r requirements.txt +${CM_PYTHON_BIN_WITH_PATH} -m pip install -r requirements.txt if [ "${?}" != "0" ]; then exit $?; fi -python setup.py bdist_wheel +${CM_PYTHON_BIN_WITH_PATH} setup.py bdist_wheel test $? -eq 0 || exit $? cd dist -python -m pip install torch-2.*linux_x86_64.whl +${CM_PYTHON_BIN_WITH_PATH} -m pip install torch-2.*linux_x86_64.whl test $? -eq 0 || exit $? diff --git a/cm-mlops/script/install-pytorch-kineto-from-src/_cm.json b/cm-mlops/script/install-pytorch-kineto-from-src/_cm.json new file mode 100644 index 0000000000..d7417150b2 --- /dev/null +++ b/cm-mlops/script/install-pytorch-kineto-from-src/_cm.json @@ -0,0 +1,130 @@ +{ + "alias": "install-pytorch-kineto-from-src", + "automation_alias": "script", + "automation_uid": "5b4e0237da074764", + "cache": true, + "category": "Compiler automation", + "deps": [ + { + "tags": "detect,os" + }, + { + "tags": "detect,cpu" + }, + { + "names": [ + "python", + "python3" + ], + "skip_if_env": { + "CM_CONDA_ENV": [ + "yes" + ] + }, + "tags": "get,python3" + }, + { + "tags": "get,cmake", + "version_min": "3.25.0" + }, + { + "env": { + "CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_PYTORCH_KINETO_SRC_REPO_PATH" + }, + "extra_cache_tags": "pytorch-kineto,kineto,src,pytorch-kineto-src,pytorch-kineto-src-repo", + "names": [ + "pytorch-kineto-src-repo" + ], + "tags": "get,git,repo", + "update_tags_from_env_with_prefix": { + "_branch.": [ + "CM_GIT_CHECKOUT" + ], + "_repo.": [ + "CM_GIT_URL" + ], + "_sha.": [ + "CM_GIT_CHECKOUT_SHA" + ], + "_tag.": [ + "CM_GIT_CHECKOUT_TAG" + ] + } + } + ], + "env": { + "CM_GIT_URL": "https://github.com/pytorch/kineto" + }, + "name": "Build pytorch kineto from sources", + "new_env_keys": [ + "CM_PYTORCH_KINETO_*" + ], + "prehook_deps": [], + "sort": 1000, + "tags": [ + "install", + "get", + "src", + "from.src", + "pytorch-kineto", + "kineto", + "src-pytorch-kineto" + ], + "uid": "98a4b061712d4483", + "variations": { + "branch.#": { + "env": { + "CM_GIT_CHECKOUT": "#" + } + }, + "cuda": { + "deps": [ + { + "names": [ + "cuda" + ], + "tags": "get,cuda,_cudnn" + } + ], + "env": { + "CUDA_HOME": "<<>>", + "CUDA_NVCC_EXECUTABLE": "<<>>", + "CUDNN_INCLUDE_PATH": "<<>>", + "CUDNN_LIBRARY_PATH": "<<>>", + "TORCH_CUDA_ARCH_LIST": "Ampere Ada Hopper", + "TORCH_CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=1", + "USE_CUDA": "1", + "USE_CUDNN": "1" + } + }, + "repo.#": { + "env": { + "CM_GIT_URL": "#" + }, + "group": "repo" + }, + "repo.https://github.com/pytorch/kineto": { + "default": true, + "env": { + "CM_GIT_URL": "https://github.com/pytorch/kineto" + }, + "group": "repo" + }, + "sha.#": { + "env": { + "CM_GIT_CHECKOUT_SHA": "#" + } + }, + "tag.#": { + "ad": { + "pytorch-src-repo": { + "tags": "_full-history" + } + }, + "env": { + "CM_GIT_CHECKOUT_TAG": "#" + } + } + }, + "versions": {} +} diff --git a/cm-mlops/script/install-pytorch-kineto-from-src/customize.py b/cm-mlops/script/install-pytorch-kineto-from-src/customize.py new file mode 100644 index 0000000000..df2744ac4d --- /dev/null +++ b/cm-mlops/script/install-pytorch-kineto-from-src/customize.py @@ -0,0 +1,17 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + if os_info['platform'] == 'windows': + return {'return':1, 'error': 'Windows is not supported in this script yet'} + + env = i['env'] + + automation = i['automation'] + + recursion_spaces = i['recursion_spaces'] + + return {'return':0} diff --git a/cm-mlops/script/install-pytorch-kineto-from-src/run.sh b/cm-mlops/script/install-pytorch-kineto-from-src/run.sh new file mode 100644 index 0000000000..bd162e7f8e --- /dev/null +++ b/cm-mlops/script/install-pytorch-kineto-from-src/run.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=$PWD +rm -rf kineto +cp -r ${CM_PYTORCH_KINETO_SRC_REPO_PATH} kineto +cd kineto +rm -rf libkineto/build + +mkdir -p libkneto/build && cd libkineto/build +cmake .. +test $? -eq 0 || exit $? +make +test $? -eq 0 || exit $? +sudo make install +test $? -eq 0 || exit $? diff --git a/cm-mlops/script/install-tflite-from-src/README.md b/cm-mlops/script/install-tflite-from-src/README.md index ba198aaf30..7a27749c86 100644 --- a/cm-mlops/script/install-tflite-from-src/README.md +++ b/cm-mlops/script/install-tflite-from-src/README.md @@ -117,9 +117,9 @@ ___ - CM script: [detect-cpu](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/detect-cpu) * get,compiler * CM names: `--adr.['compiler']...` + - CM script: [get-llvm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-llvm) - CM script: [get-cl](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-cl) - CM script: [get-gcc](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-gcc) - - CM script: [get-llvm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-llvm) * get,cmake - CM script: [get-cmake](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-cmake) 1. ***Run "preprocess" function from [customize.py](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-tflite-from-src/customize.py)*** diff --git a/cm-mlops/script/launch-benchmark/README-extra.md b/cm-mlops/script/launch-benchmark/README-extra.md new file mode 100644 index 0000000000..3854e8ecb1 --- /dev/null +++ b/cm-mlops/script/launch-benchmark/README-extra.md @@ -0,0 +1,3 @@ +# CM script + +Universal benchmark launcher via Collective Mind diff --git a/cm-mlops/script/launch-benchmark/_cm.yaml b/cm-mlops/script/launch-benchmark/_cm.yaml new file mode 100644 index 0000000000..f45606bc24 --- /dev/null +++ b/cm-mlops/script/launch-benchmark/_cm.yaml @@ -0,0 +1,15 @@ +alias: launch-benchmark +uid: 5dc7662804bc4cad + +automation_alias: script +automation_uid: 5b4e0237da074764 + +tags: +- launch +- benchmark + +category: "Collective benchmarking" + +gui: + title: "Launch benchmark" + use_customize_func: "gui" diff --git a/cm-mlops/script/launch-benchmark/customize.py b/cm-mlops/script/launch-benchmark/customize.py new file mode 100644 index 0000000000..a6df0cf50a --- /dev/null +++ b/cm-mlops/script/launch-benchmark/customize.py @@ -0,0 +1,145 @@ +import cmind +import os + +################################################################################## +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +################################################################################## +def postprocess(i): + + env = i['env'] + + return {'return':0} + +################################################################################## +def gui(i): + + st = i['streamlit_module'] + meta = i['meta'] + gui_meta = meta['gui'] + + title = gui_meta['title'] + + # Title + st.title('[Collective Mind](https://github.com/mlcommons/ck)') + + st.markdown('### {}'.format(title)) + + + + + # Check compute + r=load_cfg({'tags':'benchmark,compute'}) + if r['return']>0: return r + + selection = r['selection'] + selection_desc = r['selection_desc'] + + # Creating compute selector + compute = st.selectbox('Select target hardware:', + range(len(selection_desc)), + format_func=lambda x: selection_desc[x], + index = 0, + key = 'compute') + + + + + return {'return':0} + +################################################################################## +def load_cfg(i): + + tags = i['tags'] + + key = i.get('key','') + if key == '': key = 'cfg-' + + ii={'action':'find', + 'automation':'cfg', + 'tags':tags} + + r=cmind.access(ii) + if r['return']>0: return r + + lst = r['list'] + + # Checking individual files inside CM entry + metas = [] + + selection = [''] + selection_desc = [''] + + for l in lst: + path = l.path + + files = os.listdir(path) + + for f in files: + if not f.endswith('.json') and not f.endswith('.yaml'): + continue + + if key!='' and not f.startswith(key): + continue + + full_path = os.path.join(path, f) + + full_path_without_ext = full_path[:-5] + + r = cmind.utils.load_yaml_and_json(full_path_without_ext) + if r['return']>0: + print ('Warning: problem loading file {}'.format(full_path)) + else: + meta = r['meta'] + + aux_tags = meta.get('tags',[]) + + aux_tags_string = ','.join(aux_tags) + + aux_tags_print = '' + for t in aux_tags: + if aux_tags_print!='': + aux_tags_print+=' • ' + + # Beautify + if t == 'cpu': t = 'CPU' + elif t == 'gpu': t = 'GPU' + elif t == 'tpu': t = 'TPU' + elif t == 'ai 100': t = 'AI 100' + elif t == 'amd': t = 'AMD' + elif t == 'x64': t = 'x64' + else: + t = t.capitalize() + + aux_tags_print += t + + uid = meta['uid'] + + dd = {'full_path': full_path, + 'uid': uid, + 'tags': aux_tags_string, + 'tags_print': aux_tags_print} + + selection.append(uid) + selection_desc.append(aux_tags_print) + + metas.append(dd) + + # Prepare selector + + + + + return {'return':0, 'lst':lst, 'all_meta':metas, 'selection':selection, 'selection_desc':selection_desc} diff --git a/cm-mlops/script/launch-benchmark/tests/debug.py b/cm-mlops/script/launch-benchmark/tests/debug.py new file mode 100644 index 0000000000..842003b2c6 --- /dev/null +++ b/cm-mlops/script/launch-benchmark/tests/debug.py @@ -0,0 +1,6 @@ +import cmind + +r=cmind.access({'action':'gui', + 'automation':'script', + 'artifact':'launch benchmark'}) +print (r) diff --git a/cm-mlops/script/process-mlperf-accuracy/customize.py b/cm-mlops/script/process-mlperf-accuracy/customize.py index 5b568659a5..b7cdc7043c 100644 --- a/cm-mlops/script/process-mlperf-accuracy/customize.py +++ b/cm-mlops/script/process-mlperf-accuracy/customize.py @@ -54,7 +54,7 @@ def preprocess(i): elif dataset == "cnndm": CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "gpt-j", "evaluation.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ - "' --dataset-file '" + env['CM_DATASET_EVAL_PATH'] + "' > '" + out_file + "'" + "' --dataset-file '" + env['CM_DATASET_EVAL_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32") +" > '" + out_file + "'" elif dataset == "coco2014": diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/README.md b/cm-mlops/script/reproduce-mlperf-inference-nvidia/README.md index 009dcb8f16..683c053067 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/README.md +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/README.md @@ -270,6 +270,17 @@ ___ - CM script: [get-generic-python-lib](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-generic-python-lib) * `_gptj_` - Workflow: + 1. ***Read "deps" on other CM scripts*** + * get,generic-python-lib,_package.datasets + - CM script: [get-generic-python-lib](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-generic-python-lib) + * get,generic-python-lib,_package.simplejson + - CM script: [get-generic-python-lib](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-generic-python-lib) + * `_preprocess_data,gptj_` + - Workflow: + 1. ***Read "deps" on other CM scripts*** + * get,ml-model,gptj,_pytorch,_rclone + * CM names: `--adr.['gptj-model']...` + - CM script: [get-ml-model-gptj](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-gptj) @@ -317,25 +328,30 @@ ___ - *CM_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE*: `#` - *CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX2*: `dla_batch_size.#` - Workflow: - * `_gptj_,_build` + * `_gptj_,build` - Workflow: 1. ***Read "deps" on other CM scripts*** - * install,torch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj - - *Warning: no scripts found* + * install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj + - CM script: [install-pytorch-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-pytorch-from-src) * get,cmake - CM script: [get-cmake](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-cmake) - * `_gptj_,_build_engine` + * `_gptj_,build_engine` - Workflow: 1. ***Read "deps" on other CM scripts*** - * install,torch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj - - *Warning: no scripts found* + * install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj + - CM script: [install-pytorch-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-pytorch-from-src) * get,cmake - CM script: [get-cmake](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-cmake) - * `_gptj_,_run_harness` + * `_gptj_,run_harness` + - Environment variables: + - *CM_MLPERF_NVIDIA_HARNESS_USE_FP8*: `True` + - *CM_MLPERF_NVIDIA_HARNESS_ENABLE_SORT*: `True` + - *CM_MLPERF_NVIDIA_HARNESS_NUM_SORT_SEGMENTS*: `2` + - *CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS*: `True` - Workflow: 1. ***Read "deps" on other CM scripts*** - * install,torch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj - - *Warning: no scripts found* + * install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj + - CM script: [install-pytorch-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-pytorch-from-src) * get,cmake - CM script: [get-cmake](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-cmake) * `_gpu_memory.16,3d-unet_,offline,run_harness` @@ -353,6 +369,11 @@ ___ - *CM_MODEL_BATCH_SIZE*: `1400` - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` - Workflow: + * `_gpu_memory.16,gptj_,offline,run_harness` + - Environment variables: + - *CM_MODEL_BATCH_SIZE*: `2` + - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` + - Workflow: * `_gpu_memory.16,resnet50,offline,run_harness` - Environment variables: - *CM_MODEL_BATCH_SIZE*: `1024` @@ -384,6 +405,11 @@ ___ - *CM_MODEL_BATCH_SIZE*: `1400` - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` - Workflow: + * `_gpu_memory.24,gptj_,offline,run_harness` + - Environment variables: + - *CM_MODEL_BATCH_SIZE*: `2` + - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` + - Workflow: * `_gpu_memory.24,resnet50,offline,run_harness` - Environment variables: - *CM_MODEL_BATCH_SIZE*: `64` @@ -416,6 +442,11 @@ ___ - *CM_MODEL_BATCH_SIZE*: `1400` - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` - Workflow: + * `_gpu_memory.32,gptj_,offline,run_harness` + - Environment variables: + - *CM_MODEL_BATCH_SIZE*: `3` + - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` + - Workflow: * `_gpu_memory.32,resnet50,offline,run_harness` - Environment variables: - *CM_MODEL_BATCH_SIZE*: `2048` @@ -446,6 +477,11 @@ ___ - *CM_MODEL_BATCH_SIZE*: `1400` - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` - Workflow: + * `_gpu_memory.40,gptj_,offline,run_harness` + - Environment variables: + - *CM_MODEL_BATCH_SIZE*: `4` + - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` + - Workflow: * `_gpu_memory.40,resnet50,offline,run_harness` - Environment variables: - *CM_MODEL_BATCH_SIZE*: `2048` @@ -476,6 +512,11 @@ ___ - *CM_MODEL_BATCH_SIZE*: `1400` - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` - Workflow: + * `_gpu_memory.48,gptj_,offline,run_harness` + - Environment variables: + - *CM_MODEL_BATCH_SIZE*: `4` + - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` + - Workflow: * `_gpu_memory.48,resnet50,offline,run_harness` - Environment variables: - *CM_MODEL_BATCH_SIZE*: `2048` @@ -506,6 +547,11 @@ ___ - *CM_MODEL_BATCH_SIZE*: `1400` - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` - Workflow: + * `_gpu_memory.80,gptj_,offline,run_harness` + - Environment variables: + - *CM_MODEL_BATCH_SIZE*: `7` + - *CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE*: `<<>>` + - Workflow: * `_gpu_memory.80,resnet50,offline,run_harness` - Environment variables: - *CM_MODEL_BATCH_SIZE*: `2048` @@ -1315,6 +1361,7 @@ ___ * `--dla_batch_size=value` → `CM_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE=value` * `--dla_copy_streams=value` → `CM_MLPERF_NVIDIA_HARNESS_DLA_COPY_STREAMS=value` * `--dla_inference_streams=value` → `CM_MLPERF_NVIDIA_HARNESS_DLA_INFERENCE_STREAMS=value` +* `--enable_sort=value` → `CM_MLPERF_NVIDIA_HARNESS_ENABLE_SORT=value` * `--end_on_device=value` → `CM_MLPERF_NVIDIA_HARNESS_END_ON_DEVICE=value` * `--extra_run_options=value` → `CM_MLPERF_NVIDIA_HARNESS_EXTRA_RUN_OPTIONS=value` * `--gpu_batch_size=value` → `CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE=value` @@ -1330,6 +1377,7 @@ ___ * `--mode=value` → `CM_MLPERF_LOADGEN_MODE=value` * `--multistream_target_latency=value` → `CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY=value` * `--num_issue_query_threads=value` → `CM_MLPERF_NVIDIA_HARNESS_NUM_ISSUE_QUERY_THREADS=value` +* `--num_sort_segments=value` → `CM_MLPERF_NVIDIA_HARNESS_NUM_SORT_SEGMENTS=value` * `--num_warmups=value` → `CM_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS=value` * `--offline_target_qps=value` → `CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS=value` * `--output_dir=value` → `CM_MLPERF_OUTPUT_DIR=value` @@ -1340,6 +1388,7 @@ ___ * `--scenario=value` → `CM_MLPERF_LOADGEN_SCENARIO=value` * `--server_target_qps=value` → `CM_MLPERF_LOADGEN_SERVER_TARGET_QPS=value` * `--singlestream_target_latency=value` → `CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY=value` +* `--skip_postprocess=value` → `CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS=value` * `--skip_preprocess=value` → `CM_SKIP_PREPROCESS_DATASET=value` * `--skip_preprocessing=value` → `CM_SKIP_PREPROCESS_DATASET=value` * `--soft_drop=value` → `CM_MLPERF_NVIDIA_HARNESS_SOFT_DROP=value` @@ -1348,6 +1397,7 @@ ___ * `--target_qps=value` → `CM_MLPERF_LOADGEN_TARGET_QPS=value` * `--use_cuda_thread_per_device=value` → `CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE=value` * `--use_deque_limit=value` → `CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT=value` +* `--use_fp8=value` → `CM_MLPERF_NVIDIA_HARNESS_USE_FP8=value` * `--use_graphs=value` → `CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS=value` * `--use_small_tile_gemm_plugin=value` → `CM_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN=value` * `--use_triton=value` → `CM_MLPERF_NVIDIA_HARNESS_USE_TRITON=value` @@ -1448,7 +1498,7 @@ ___ * `if (CM_MODEL in ['gptj-99', 'gptj-99.9'] AND CM_MLPERF_NVIDIA_HARNESS_RUN_MODE == preprocess_dataset)` * CM names: `--adr.['openorca-original']...` - CM script: [get-dataset-openorca](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-dataset-openorca) - * get,ml-model,gptj,_pytorch + * get,ml-model,gptj,_pytorch,_rclone * `if (CM_MODEL in ['gptj-99', 'gptj-99.9'] AND CM_MLPERF_NVIDIA_HARNESS_RUN_MODE == download_model)` * CM names: `--adr.['gptj-model']...` - CM script: [get-ml-model-gptj](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-gptj) diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index 4090c76a18..854e3c5e31 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -83,6 +83,10 @@ input_mapping: soft_drop: CM_MLPERF_NVIDIA_HARNESS_SOFT_DROP use_small_tile_gemm_plugin: CM_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN audio_buffer_num_lines: CM_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES + use_fp8: CM_MLPERF_NVIDIA_HARNESS_USE_FP8 + enable_sort: CM_MLPERF_NVIDIA_HARNESS_ENABLE_SORT + num_sort_segments: CM_MLPERF_NVIDIA_HARNESS_NUM_SORT_SEGMENTS + skip_postprocess: CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS new_state_keys: - mlperf-inference-implementation @@ -231,17 +235,6 @@ deps: - openorca-original tags: get,dataset,original,openorca - ######################################################################## - # Install GPTJ-6B model - - enable_if_env: - CM_MODEL: - - gptj-99 - - gptj-99.9 - CM_MLPERF_NVIDIA_HARNESS_RUN_MODE: - - download_model - names: - - gptj-model - tags: get,ml-model,gptj,_pytorch ######################################################################## # Install MLPerf inference dependencies @@ -264,6 +257,18 @@ deps: CM_MLPERF_NVIDIA_HARNESS_RUN_MODE: - run_harness +prehook_deps: + ######################################################################## + # Install GPTJ-6B model + - enable_if_env: + CM_REQUIRE_GPTJ_MODEL_DOWNLOAD: + - 'yes' + CM_MLPERF_NVIDIA_HARNESS_RUN_MODE: + - download_model + - preprocess_data + names: + - gptj-model + tags: get,ml-model,gptj,_pytorch,_rclone # Post dependencies to run this app including for power measurement post_deps: @@ -415,22 +420,20 @@ variations: env: CM_MODEL: dlrm-v2-99.9 - gptj_: {} - gptj_,_build: + gptj_: deps: - - tags: install,torch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj - - tags: get,cmake - version_min: "3.25.0" - - gptj_,_build_engine: + - tags: get,generic-python-lib,_package.datasets + - tags: get,generic-python-lib,_package.simplejson + + gptj_,build: deps: - - tags: install,torch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj + - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj - tags: get,cmake version_min: "3.25.0" - gptj_,_run_harness: + gptj_,build_engine: deps: - - tags: install,torch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj + - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj - tags: get,cmake version_min: "3.25.0" @@ -454,11 +457,18 @@ variations: CM_MODEL_BATCH_SIZE: "#" CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "#" #CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX1: "gpu_batch_size.#" + adr: + build-engine: + tags: _batch_size.# dla_batch_size.#: + group: dla-batch-size env: CM_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE: "#" CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX2: "dla_batch_size.#" + adr: + build-engine: + tags: _dla_batch_size.# use_triton: group: triton @@ -793,337 +803,211 @@ variations: SKIP_POLICIES: '1' singlestream,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1 + default_variations: + batch-size: batch_size.1 + + gptj_,run_harness: + deps: + - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj + - tags: get,cmake + version_min: "3.25.0" env: - CM_MODEL_BATCH_SIZE: "1" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + CM_MLPERF_NVIDIA_HARNESS_USE_FP8: 'True' + CM_MLPERF_NVIDIA_HARNESS_ENABLE_SORT: 'True' + CM_MLPERF_NVIDIA_HARNESS_NUM_SORT_SEGMENTS: '2' + CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS: True + + gpu_memory.16,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size.4 + + gpu_memory.24,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size.7 + + gpu_memory.32,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size.8 + + gpu_memory.48,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size.14 + + gpu_memory.40,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size.10 + + gpu_memory.80,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size.32 gpu_memory.16,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 gpu_memory.24,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 gpu_memory.32,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 gpu_memory.48,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1024 - env: - CM_MODEL_BATCH_SIZE: "1024" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1024 gpu_memory.40,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 gpu_memory.80,bert_,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.64 - env: - CM_MODEL_BATCH_SIZE: "64" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.64 gpu_memory.16,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1024 + default_variations: + batch-size: batch_size.1024 env: - CM_MODEL_BATCH_SIZE: "1024" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" gpu_memory.40,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 gpu_memory.24,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.64 - env: - CM_MODEL_BATCH_SIZE: "64" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.64 gpu_memory.32,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 gpu_memory.48,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 gpu_memory.80,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 resnet50,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.64 - env: - CM_MODEL_BATCH_SIZE: "64" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.64 resnet50,multistream,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 retinanet,multistream,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 - env: - CM_MODEL_BATCH_SIZE: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2 gpu_memory.16,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 - env: - CM_MODEL_BATCH_SIZE: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2 gpu_memory.40,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.4 - env: - CM_MODEL_BATCH_SIZE: "4" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.4 gpu_memory.32,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.4 - env: - CM_MODEL_BATCH_SIZE: "4" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.4 gpu_memory.48,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.4 - env: - CM_MODEL_BATCH_SIZE: "4" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.4 gpu_memory.24,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 + default_variations: + batch-size: batch_size.2 env: - CM_MODEL_BATCH_SIZE: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" gpu_memory.80,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 retinanet,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 + default_variations: + batch-size: batch_size.8 env: - CM_MODEL_BATCH_SIZE: "8" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" gpu_memory.16,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1024 - env: - CM_MODEL_BATCH_SIZE: "1024" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1024 gpu_memory.40,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 gpu_memory.24,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 gpu_memory.32,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 gpu_memory.48,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 gpu_memory.80,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 gpu_memory.16,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.4 - env: - CM_MODEL_BATCH_SIZE: "4" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.4 gpu_memory.40,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 gpu_memory.24,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 gpu_memory.80,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 gpu_memory.32,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 gpu_memory.48,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 gpu_memory.16,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 gpu_memory.40,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 gpu_memory.24,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 gpu_memory.32,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 gpu_memory.48,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 gpu_memory.80,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 orin: group: gpu-name @@ -1142,96 +1026,54 @@ variations: CM_NVIDIA_CUSTOM_GPU: "yes" rtx_4090,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.64 - env: - CM_MODEL_BATCH_SIZE: "64" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.64 rtx_4090,resnet50,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.32 - env: - CM_MODEL_BATCH_SIZE: "32" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.32 rtx_4090,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 + default_variations: + batch-size: batch_size.2 env: - CM_MODEL_BATCH_SIZE: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" rtx_4090,retinanet,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 + default_variations: + batch-size: batch_size.2 env: - CM_MODEL_BATCH_SIZE: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" rtx_4090,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 rtx_4090,bert_,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 rtx_4090,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 rtx_4090,3d-unet_,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 rtx_4090,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 rtx_4090,rnnt,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 rtx_4090,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 a6000: group: gpu-name @@ -1239,92 +1081,48 @@ variations: CM_NVIDIA_CUSTOM_GPU: "yes" rtx_a6000,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.64 - env: - CM_MODEL_BATCH_SIZE: "64" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.64 rtx_a6000,resnet50,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.32 - env: - CM_MODEL_BATCH_SIZE: "32" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.32 rtx_a6000,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 - env: - CM_MODEL_BATCH_SIZE: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2 rtx_a6000,retinanet,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 - env: - CM_MODEL_BATCH_SIZE: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2 rtx_a6000,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 rtx_a6000,bert_,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 rtx_a6000,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 rtx_a6000,3d-unet_,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 rtx_a6000,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 rtx_a6000,rnnt,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.512 - env: - CM_MODEL_BATCH_SIZE: "512" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.512 rtx_a6000,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 rtx_6000_ada: group: gpu-name @@ -1332,92 +1130,48 @@ variations: CM_NVIDIA_CUSTOM_GPU: "yes" rtx_6000_ada,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.64 - env: - CM_MODEL_BATCH_SIZE: "64" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.64 rtx_6000_ada,resnet50,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.32 - env: - CM_MODEL_BATCH_SIZE: "32" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.32 rtx_6000_ada,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 - env: - CM_MODEL_BATCH_SIZE: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2 rtx_6000_ada,retinanet,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 - env: - CM_MODEL_BATCH_SIZE: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2 rtx_6000_ada,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 rtx_6000_ada,bert_,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 rtx_6000_ada,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 rtx_6000_ada,3d-unet_,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 rtx_6000_ada,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.512 - env: - CM_MODEL_BATCH_SIZE: "512" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.512 rtx_6000_ada,rnnt,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.512 - env: - CM_MODEL_BATCH_SIZE: "512" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.512 rtx_6000_ada,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 l4: group: gpu-name @@ -1432,47 +1186,32 @@ variations: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY: 1 l4,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.32 - + default_variations: + batch-size: batch_size.32 env: - CM_MODEL_BATCH_SIZE: "32" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "1" CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" l4,resnet50,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.16 - + default_variations: + batch-size: batch_size.16 env: - CM_MODEL_BATCH_SIZE: "16" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "9" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT: 'True' CM_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 2000 CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE: 'True' - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" l4,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 - env: - CM_MODEL_BATCH_SIZE: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2 l4,retinanet,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 + default_variations: + batch-size: batch_size.2 env: - CM_MODEL_BATCH_SIZE: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT: 'True' @@ -1480,59 +1219,37 @@ variations: CM_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE: 20000000000 l4,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.16 - env: - CM_MODEL_BATCH_SIZE: "16" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.16 l4,bert_,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.16 + default_variations: + batch-size: batch_size.16 env: - CM_MODEL_BATCH_SIZE: "16" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" CM_MLPERF_NVIDIA_HARNESS_GRAPHS_MAX_SEQLEN: "200" CM_MLPERF_NVIDIA_HARNESS_SERVER_NUM_ISSUE_QUERY_THREADS: "1" CM_MLPERF_NVIDIA_HARNESS_SOFT_DROP: "1.0" CM_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN: "True" l4,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1 - env: - CM_MODEL_BATCH_SIZE: "1" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1 l4,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.512 - env: - CM_MODEL_BATCH_SIZE: "512" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.512 l4,rnnt,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.512 + default_variations: + batch-size: batch_size.512 env: - CM_MODEL_BATCH_SIZE: "512" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" CM_MLPERF_NVIDIA_HARNESS_AUDIO_BATCH_SIZE: "64" CM_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES: "1024" CM_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS: "1024" l4,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 t4: group: gpu-name env: @@ -1546,44 +1263,29 @@ variations: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY: 2 t4,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - + default_variations: + batch-size: batch_size.256 env: - CM_MODEL_BATCH_SIZE: "256" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" t4,resnet50,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.26 - + default_variations: + batch-size: batch_size.26 env: - CM_MODEL_BATCH_SIZE: "26" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT: True CM_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 2000 - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" CM_MLPERF_NVIDIA_HARNESS_SOFT_DROP: "0.993" t4,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.4 - env: - CM_MODEL_BATCH_SIZE: "4" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.4 t4,retinanet,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2 + default_variations: + batch-size: batch_size.2 env: - CM_MODEL_BATCH_SIZE: "2" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT: 'True' @@ -1591,63 +1293,42 @@ variations: CM_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE: 20000000000 t4,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 t4,bert_,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.4 + default_variations: + batch-size: batch_size.4 env: - CM_MODEL_BATCH_SIZE: "4" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" CM_MLPERF_NVIDIA_HARNESS_GRAPHS_MAX_SEQLEN: "240" CM_MLPERF_NVIDIA_HARNESS_SERVER_NUM_ISSUE_QUERY_THREADS: "0" CM_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN: "no" t4,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 t4,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 + default_variations: + batch-size: batch_size.2048 env: - CM_MODEL_BATCH_SIZE: "2048" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' CM_MLPERF_NVIDIA_HARNESS_AUDIO_BATCH_SIZE: "128" CM_MLPERF_NVIDIA_HARNESS_DISABLE_ENCODER_PLUGIN: "True" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" t4,rnnt,server,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 + default_variations: + batch-size: batch_size.2048 env: - CM_MODEL_BATCH_SIZE: "2048" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' CM_MLPERF_NVIDIA_HARNESS_AUDIO_BATCH_SIZE: "128" CM_MLPERF_NVIDIA_HARNESS_DISABLE_ENCODER_PLUGIN: "True" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" t4,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.1400 pcie: group: gpu-connection @@ -1670,54 +1351,31 @@ variations: CM_NVIDIA_CUSTOM_GPU: "yes" a100,sxm,resnet50,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 + default_variations: + batch-size: batch_size.2048 env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" CM_MLPERF_PERFORMANCE_SAMPLE_COUNT: "2048" a100,sxm,retinanet,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.32 + default_variations: + batch-size: batch_size.32 env: - CM_MODEL_BATCH_SIZE: "32" CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE: "300000000000" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" a100,sxm,bert_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.256 - env: - CM_MODEL_BATCH_SIZE: "256" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.256 a100,sxm,3d-unet_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.8 - env: - CM_MODEL_BATCH_SIZE: "8" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.8 a100,sxm,rnnt,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.2048 - env: - CM_MODEL_BATCH_SIZE: "2048" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" + default_variations: + batch-size: batch_size.2048 a100,sxm,dlrm_,offline,run_harness: - add_deps_recursive: - build-engine: - tags: _batch_size.1400 - env: - CM_MODEL_BATCH_SIZE: "1400" - CM_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE: "<<>>" - + default_variations: + batch-size: batch_size.1400 diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py index 96a6b20e26..ad6f057baf 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/customize.py @@ -153,12 +153,13 @@ def preprocess(i): vocab_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'bert', 'vocab.txt') if not os.path.exists(os.path.dirname(fp32_model_path)): - cmds.append(f"mkdir -p {os.path.dirname(fp8_model_path)}") - if not os.path.exists(os.path.dirname(fp8_model_path)): cmds.append(f"mkdir -p {os.path.dirname(fp32_model_path)}") + if not os.path.exists(os.path.dirname(fp8_model_path)): + cmds.append(f"mkdir -p {os.path.dirname(fp8_model_path)}") if not os.path.exists(fp32_model_path): - cmds.append(f"ln -sf {env['GPTJ_CHECKPOINT_PATH']} {fp32_model_path}") + env['CM_REQUIRE_GPTJ_MODEL_DOWNLOAD'] # download via prehook_deps + cmds.append(f"cp -r $GPTJ_CHECKPOINT_PATH {fp32_model_path}") model_name = "gptj" model_path = fp8_model_path @@ -366,10 +367,26 @@ def preprocess(i): if audio_buffer_num_lines: run_config += f" --audio_buffer_num_lines={audio_buffer_num_lines}" + use_fp8 = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_FP8') + if use_fp8 and use_fp8.lower() not in [ "no", "false" ]: + run_config += f" --use_fp8" + + enable_sort = env.get('CM_MLPERF_NVIDIA_HARNESS_ENABLE_SORT') + if enable_sort and enable_sort.lower() not in [ "no", "false" ]: + run_config += f" --enable_sort" + + num_sort_segments = env.get('CM_MLPERF_NVIDIA_HARNESS_NUM_SORT_SEGMENTS') + if num_sort_segments: + run_config += f" --num_sort_segments={num_sort_segments}" + num_warmups = env.get('CM_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS') if num_warmups: run_config += f" --num_warmups={num_warmups}" + skip_postprocess = env.get('CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS') + if skip_postprocess and skip_postprocess.lower() not in [ "no", "false" ]: + run_config += f" --skip_postprocess" + if test_mode: test_mode_string = " --test_mode={}".format(test_mode) else: diff --git a/cm-mlops/script/run-all-mlperf-models/README.md b/cm-mlops/script/run-all-mlperf-models/README.md index 02d42c7fa7..d464d23c95 100644 --- a/cm-mlops/script/run-all-mlperf-models/README.md +++ b/cm-mlops/script/run-all-mlperf-models/README.md @@ -11,6 +11,7 @@ * [ Run this script via GUI](#run-this-script-via-gui) * [ Run this script via Docker (beta)](#run-this-script-via-docker-(beta)) * [Customization](#customization) + * [ Variations](#variations) * [ Default environment](#default-environment) * [Script workflow, dependencies and native scripts](#script-workflow-dependencies-and-native-scripts) * [Script output](#script-output) @@ -47,9 +48,11 @@ ___ #### Run this script from command line -1. `cm run script --tags=run,natively,all,mlperf-models ` +1. `cm run script --tags=run,natively,all,mlperf-models[,variations] ` -2. `cmr "run natively all mlperf-models" ` +2. `cmr "run natively all mlperf-models[ variations]" ` + +* `variations` can be seen [here](#variations) #### Run this script from Python @@ -85,11 +88,106 @@ Use this [online GUI](https://cKnowledge.org/cm-gui/?tags=run,natively,all,mlper #### Run this script via Docker (beta) -`cm docker script "run natively all mlperf-models" ` +`cm docker script "run natively all mlperf-models[ variations]" ` ___ ### Customization + +#### Variations + + * *No group (any variation can be selected)* +
+ Click here to expand this section. + + * `_phoenix,reference` + - Workflow: + +
+ + + * Group "**implementation**" +
+ Click here to expand this section. + + * `_deepsparse` + - Environment variables: + - *DIVISION*: `open` + - *IMPLEMENTATION*: `deepsparse` + - Workflow: + * `_intel` + - Environment variables: + - *IMPLEMENTATION*: `intel` + - Workflow: + * `_mil` + - Environment variables: + - *IMPLEMENTATION*: `mil` + - Workflow: + * `_nvidia` + - Environment variables: + - *IMPLEMENTATION*: `nvidia` + - Workflow: + * `_qualcomm` + - Environment variables: + - *IMPLEMENTATION*: `qualcomm` + - Workflow: + * `_reference` + - Environment variables: + - *IMPLEMENTATION*: `reference` + - Workflow: + * `_tflite-cpp` + - Environment variables: + - *IMPLEMENTATION*: `tflite_cpp` + - Workflow: + +
+ + + * Group "**power**" +
+ Click here to expand this section. + + * **`_performance-only`** (default) + - Workflow: + * `_power` + - Environment variables: + - *POWER*: `True` + - Workflow: + +
+ + + * Group "**sut**" +
+ Click here to expand this section. + + * `_macbookpro-m1` + - Environment variables: + - *CATEGORY*: `edge` + - *DIVISION*: `closed` + - Workflow: + * `_orin.32g` + - Environment variables: + - *CATEGORY*: `edge` + - *DIVISION*: `closed` + - Workflow: + * `_phoenix` + - Environment variables: + - *CATEGORY*: `edge,datacenter` + - *DIVISION*: `closed` + - Workflow: + * `_sapphire-rapids.24c` + - Environment variables: + - *CATEGORY*: `edge,datacenter` + - *DIVISION*: `closed` + - Workflow: + +
+ + +#### Default variations + +`_performance-only` #### Default environment
@@ -128,7 +226,7 @@ ___ ___ ### Script output -`cmr "run natively all mlperf-models" -j` +`cmr "run natively all mlperf-models[,variations]" -j` #### New environment keys (filter) #### New environment keys auto-detected from customize diff --git a/cm-mlops/script/run-docker-container/_cm.json b/cm-mlops/script/run-docker-container/_cm.json index 2b20bdf4eb..60fba4cf6d 100644 --- a/cm-mlops/script/run-docker-container/_cm.json +++ b/cm-mlops/script/run-docker-container/_cm.json @@ -32,6 +32,8 @@ "pass_user_group": "CM_DOCKER_PASS_USER_GROUP", "mounts": "CM_DOCKER_VOLUME_MOUNTS", "port_maps": "CM_DOCKER_PORT_MAPS", + "shm_size": "CM_DOCKER_SHM_SIZE", + "extra_run_args": "CM_DOCKER_EXTRA_RUN_ARGS", "device": "CM_DOCKER_ADD_DEVICE", "cache": "CM_DOCKER_CACHE", "all_gpus": "CM_DOCKER_ADD_ALL_GPUS" diff --git a/cm-mlops/script/run-docker-container/customize.py b/cm-mlops/script/run-docker-container/customize.py index 79d1e52887..31501b893b 100644 --- a/cm-mlops/script/run-docker-container/customize.py +++ b/cm-mlops/script/run-docker-container/customize.py @@ -79,24 +79,30 @@ def postprocess(i): port_map_cmds = [] run_opts = '' - if 'CM_DOCKER_PRE_RUN_COMMANDS' in env: + if env.get('CM_DOCKER_PRE_RUN_COMMANDS', []): for pre_run_cmd in env['CM_DOCKER_PRE_RUN_COMMANDS']: run_cmds.append(pre_run_cmd) - if 'CM_DOCKER_VOLUME_MOUNTS' in env: + if env.get('CM_DOCKER_VOLUME_MOUNTS', []): for mounts in env['CM_DOCKER_VOLUME_MOUNTS']: mount_cmds.append(mounts) - if 'CM_DOCKER_PASS_USER_GROUP' in env: + if env.get('CM_DOCKER_PASS_USER_GROUP', '') != '': run_opts += " --group-add $(id -g $USER) " - if 'CM_DOCKER_ADD_DEVICE' in env: + if env.get('CM_DOCKER_ADD_DEVICE', '') != '': run_opts += " --device="+env['CM_DOCKER_ADD_DEVICE'] - if 'CM_DOCKER_ADD_ALL_GPUS' in env: + if env.get('CM_DOCKER_ADD_ALL_GPUS', '') != '': run_opts += " --gpus=all" - if 'CM_DOCKER_PORT_MAPS' in env: + if env.get('CM_DOCKER_SHM_SIZE', '') != '': + run_opts += " --shm-size={}".format(env['CM_DOCKER_SHM_SIZE']) + + if env.get('CM_DOCKER_EXTRA_RUN_ARGS', '') != '': + run_opts += env['CM_DOCKER_EXTRA_RUN_ARGS'] + + if env.get('CM_DOCKER_PORT_MAPS', []): for ports in env['CM_DOCKER_PORT_MAPS']: port_map_cmds.append(ports) diff --git a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml index 6f12b26cd4..a0de65dd01 100644 --- a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml +++ b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml @@ -5,7 +5,9 @@ automation_alias: script automation_uid: 5b4e0237da074764 category: Modular MLPerf inference benchmark pipeline -gui_title: CM GUI to run the MLPerf inference benchmark and prepare submissions + +gui: + title: CM GUI to run MLPerf inference benchmarks and prepare submissions clean_output_files: - open.tar.gz diff --git a/cm-mlops/script/run-mlperf-training-submission-checker/README.md b/cm-mlops/script/run-mlperf-training-submission-checker/README.md index 3e6da038a8..5e1f968928 100644 --- a/cm-mlops/script/run-mlperf-training-submission-checker/README.md +++ b/cm-mlops/script/run-mlperf-training-submission-checker/README.md @@ -166,7 +166,7 @@ ___ * CM names: `--adr.['inference-src', 'submission-checker-src']...` - CM script: [get-mlperf-inference-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-src) * install,mlperf,logging,from.src - - CM script: [install-mlperf-logging-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-mlperf-logging-from.src) + - CM script: [install-mlperf-logging-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-mlperf-logging-from-src) 1. ***Run "preprocess" function from [customize.py](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-training-submission-checker/customize.py)*** 1. Read "prehook_deps" on other CM scripts from [meta](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-training-submission-checker/_cm.json) 1. ***Run native script if exists*** diff --git a/cm/CHANGES.md b/cm/CHANGES.md index 234106a716..503f7e28d3 100644 --- a/cm/CHANGES.md +++ b/cm/CHANGES.md @@ -1,8 +1,9 @@ -## V1.6.1 +## V1.6.2 - improved --help for common automations and CM scripts (automation recipes) - fixed a few minor bugs - added support to print directories and files for a given CM entry via "cm info {automation} {artifact|--tags}" + - fixed "cm pull repo" if repo already exists ## V1.6.0 - added support for Python 3.12 (removed "pkg" dependency) diff --git a/cm/cmind/__init__.py b/cm/cmind/__init__.py index 9256608d6c..2a447b6536 100644 --- a/cm/cmind/__init__.py +++ b/cm/cmind/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.6.1.1" +__version__ = "1.6.2.1" from cmind.core import access from cmind.core import error diff --git a/cm/cmind/cli.py b/cm/cmind/cli.py index 39c861edb6..96a6917319 100644 --- a/cm/cmind/cli.py +++ b/cm/cmind/cli.py @@ -101,6 +101,32 @@ def docker_script(argv = None): return run(['docker', 'script'] + argv) +############################################################ +def gui_script(argv = None): + """ + Shortcut to "cm gui script ..." + + CM command line format: + + Args: + argv (list | string): command line arguments + + Returns: + (CM return dict): + + * return (int): return code == 0 if no error and >0 if error + * (error) (str): error string if return>0 + + * Output from a CM automation action + + """ + + # Access CM + if argv is None: + argv = sys.argv[1:] + + return run(['gui', 'script'] + argv) + ############################################################ def run_experiment(argv = None): """ diff --git a/cm/cmind/repos.py b/cm/cmind/repos.py index 4a001f860b..5b31cc6145 100644 --- a/cm/cmind/repos.py +++ b/cm/cmind/repos.py @@ -286,14 +286,6 @@ def pull(self, alias, url = '', branch = '', checkout = '', console = False, des os.chdir(path_to_repo) cmd = 'git pull' - # Attempt to clone - os.chdir(self.full_path_to_repos) - - cmd = 'git clone '+url+' '+alias - - # Check if depth is set - if depth!=None and depth!='': - cmd+=' --depth '+str(depth) else: # Attempt to clone clone = True diff --git a/cm/cmind/utils.py b/cm/cmind/utils.py index e0771baf57..01fa730d0d 100644 --- a/cm/cmind/utils.py +++ b/cm/cmind/utils.py @@ -1580,7 +1580,8 @@ def call_internal_module(module_self, path_to_current_module, module_name, modul del(sys.path[0]) - i['self_module'] = module_self + if module_self!=None: + i['self_module'] = module_self return getattr(tmp_module, module_func)(i) diff --git a/cm/setup.py b/cm/setup.py index f3a584f7f4..b24225ca38 100644 --- a/cm/setup.py +++ b/cm/setup.py @@ -101,6 +101,7 @@ def run(self): "cm = cmind.cli:run", "cmr = cmind.cli:run_script", "cmrd = cmind.cli:docker_script", + "cmg = cmind.cli:gui_script", "cme = cmind.cli:run_experiment" ]}, diff --git a/cmr.yaml b/cmr.yaml index b94693a602..48ae611d66 100644 --- a/cmr.yaml +++ b/cmr.yaml @@ -2,4 +2,4 @@ alias: mlcommons@ck git: true prefix: cm-mlops uid: a4705959af8e447a -version: 1.6.0 +version: 1.6.2.1 diff --git a/docs/getting-started.md b/docs/getting-started.md index 474817329a..493175fc4a 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -628,11 +628,11 @@ and from the [Student Cluster Competition at Supercomputing'23](tutorials/scc23- ## How to use CM as a common interface to other projects? While CM was successfully validated to unify, modularize and automate MLPerf benchmarks, -it is applicable to any software project. +it turned out to be applicable to any software project. -The community started using CM automation recipes as a common and human-friendly interface -to run other software projects and manage experiments across diverse models, data sets, software and hardware -while making them more modular, portable and reusable. +The community started using CM as a common and human-friendly interface to run other software projects +and manage experiments across diverse models, data sets, software and hardware while making them more modular, +portable and reusable. Please check [other CM tutorials](tutorials), [CM documentation](README.md) and our [ACM REP'23 keynote](https://www.youtube.com/watch?v=7zpeIVwICa4) for more details. diff --git a/docs/history.md b/docs/history.md index ecba5cd151..4a75cc7ba5 100644 --- a/docs/history.md +++ b/docs/history.md @@ -2,7 +2,7 @@ # Collective Knowledge v1 and v2 (CK) -The [open-source Collective Knowledge Technology v1 and v2 (CK)](https://arxiv.org/abs/2011.01149) +The [open-source Collective Knowledge Technology v1 and v2 (CK)](https://doi.org/10.1098/rsta.2020.0211) was originally developed by [Grigori Fursin](https://cKnowledge.org/gfursin) based on his long experience helping the community [reproduce many research projects and validate them in the real world](https://learning.acm.org/techtalks/reproducibility) diff --git a/docs/installation.md b/docs/installation.md index c9504757cc..4b2c9a60ca 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -21,7 +21,7 @@ # CM installation -The CM language requires minimal dependencies to run on any platform: `python 3+, pip, git, wget`. +MLCommons Collective Mind framework requires minimal dependencies to run on any platform: `python 3+, pip, git, wget`. However, most CM automation recipes shared by the community and MLCommons require Python 3.7+ . ***By default, CM will pull Git repositories and cache installations and downloaded files in your `$HOME/CM` directory (Linux/MacOS). diff --git a/docs/interface.md b/docs/interface.md index 5a3ac65cb7..4e05ec35e5 100644 --- a/docs/interface.md +++ b/docs/interface.md @@ -295,4 +295,4 @@ You can see the use of CM in these real-world examples: ## Further reading * [CM specification](specs/README.md) -* [Article with the concept of a common automation language based on previous version of CM language before MLCommons](https://arxiv.org/abs/2011.01149) +* [Article with the concept of a common automation language based on previous version of CM language before MLCommons](https://doi.org/10.1098/rsta.2020.0211) diff --git a/docs/introduction-cm.md b/docs/introduction-cm.md index fe5554fdbd..7bd453e6b2 100644 --- a/docs/introduction-cm.md +++ b/docs/introduction-cm.md @@ -68,6 +68,6 @@ across continuously changing software, hardware, models, and data. ## Presentations * [CK vision (ACM Tech Talk at YouTube)](https://www.youtube.com/watch?v=7zpeIVwICa4) -* [CK concepts (Philosophical Transactions of the Royal Society)](https://arxiv.org/abs/2011.01149) +* [CK concepts (Philosophical Transactions of the Royal Society)](https://doi.org/10.1098/rsta.2020.0211) * [CM workflow automation introduction (slides from ACM REP'23 keynote)](https://doi.org/10.5281/zenodo.8105339) * [MLPerf inference submitter orientation (slides)](https://doi.org/10.5281/zenodo.8144274) diff --git a/docs/list_of_scripts.md b/docs/list_of_scripts.md index 875cc322ac..eb96ccc4c1 100644 --- a/docs/list_of_scripts.md +++ b/docs/list_of_scripts.md @@ -19,12 +19,12 @@ CM scripts can easily chained together into automation workflows using `deps` an while automatically updating all environment variables and paths for a given task and platform [using simple JSON or YAML](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml). + *Note that CM is a community project being developed and extended by [MLCommons members and individual contributors](../CONTRIBUTING.md) - you can find source code of CM scripts maintained by MLCommons [here](../cm-mlops/script). Please join [Discord server](https://discord.gg/JjWNWXKxwT) to participate in collaborative developments or provide your feedback.* - # License [Apache 2.0](LICENSE.md) @@ -105,7 +105,7 @@ for a given task and platform [using simple JSON or YAML](https://github.com/mlc * [get-qaic-software-kit](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-qaic-software-kit) * [get-rocm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-rocm) * [get-tvm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-tvm) -* [install-qaic-compute-sdk-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-qaic-compute-sdk-from.src) +* [install-qaic-compute-sdk-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-qaic-compute-sdk-from-src) * [install-rocm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-rocm) * [install-tensorflow-for-c](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-tensorflow-for-c) * [install-tensorflow-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-tensorflow-from-src) @@ -179,13 +179,13 @@ for a given task and platform [using simple JSON or YAML](https://github.com/mlc * [get-go](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-go) * [get-llvm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-llvm) *(Detect or install LLVM compiler)* * [install-gcc-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-gcc-src) -* [install-ipex-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-ipex-from.src) *(Build IPEX from sources)* +* [install-ipex-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-ipex-from-src) *(Build IPEX from sources)* * [install-llvm-prebuilt](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-llvm-prebuilt) *(Install prebuilt LLVM compiler)* * [install-llvm-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-llvm-src) *(Build LLVM compiler from sources (can take >30 min))* -* [install-onednn-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onednn-from.src) *(Build oneDNN from sources)* -* [install-onnxruntime-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onnxruntime-from.src) *(Build onnxruntime from sources)* -* [install-pytorch-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-pytorch-from.src) *(Build pytorch from sources)* -* [install-transformers-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-transformers-from.src) *(Build transformers from sources)* +* [install-onednn-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onednn-from-src) *(Build oneDNN from sources)* +* [install-onnxruntime-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onnxruntime-from-src) *(Build onnxruntime from sources)* +* [install-pytorch-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-pytorch-from-src) *(Build pytorch from sources)* +* [install-transformers-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-transformers-from-src) *(Build transformers from sources)* ### Dashboard automation @@ -277,11 +277,14 @@ for a given task and platform [using simple JSON or YAML](https://github.com/mlc * [generate-mlperf-tiny-report](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/generate-mlperf-tiny-report) * [generate-mlperf-tiny-submission](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/generate-mlperf-tiny-submission) * [generate-nvidia-engine](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/generate-nvidia-engine) +* [get-mlperf-inference-intel-scratch-space](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-intel-scratch-space) * [get-mlperf-inference-loadgen](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-loadgen) * [get-mlperf-inference-nvidia-common-code](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-nvidia-common-code) * [get-mlperf-inference-nvidia-scratch-space](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-nvidia-scratch-space) * [get-mlperf-inference-results](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-results) +* [get-mlperf-inference-results-dir](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-results-dir) * [get-mlperf-inference-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-src) +* [get-mlperf-inference-submission-dir](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-submission-dir) * [get-mlperf-inference-sut-configs](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-sut-configs) * [get-mlperf-inference-sut-description](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-sut-description) * [get-mlperf-logging](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-logging) @@ -294,7 +297,7 @@ for a given task and platform [using simple JSON or YAML](https://github.com/mlc * [import-mlperf-inference-to-experiment](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/import-mlperf-inference-to-experiment) * [import-mlperf-tiny-to-experiment](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/import-mlperf-tiny-to-experiment) * [import-mlperf-training-to-experiment](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/import-mlperf-training-to-experiment) -* [install-mlperf-logging-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-mlperf-logging-from.src) +* [install-mlperf-logging-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-mlperf-logging-from-src) * [prepare-training-data-bert](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/prepare-training-data-bert) * [prepare-training-data-resnet](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/prepare-training-data-resnet) * [preprocess-mlperf-inference-submission](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/preprocess-mlperf-inference-submission) @@ -426,6 +429,7 @@ for a given task and platform [using simple JSON or YAML](https://github.com/mlc * [create-conda-env](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/create-conda-env) * [create-fpgaconvnet-app-tinyml](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/create-fpgaconvnet-app-tinyml) * [create-fpgaconvnet-config-tinyml](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/create-fpgaconvnet-config-tinyml) +* [create-patch](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/create-patch) * [destroy-terraform](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/destroy-terraform) * [detect-cpu](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/detect-cpu) * [detect-os](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/detect-os) @@ -434,6 +438,7 @@ for a given task and platform [using simple JSON or YAML](https://github.com/mlc * [download-file](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/download-file) * [download-torrent](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/download-torrent) * [extract-file](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/extract-file) +* [fail](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/fail) * [flash-tinyml-binary](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/flash-tinyml-binary) * [generate-mlperf-inference-submission](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/generate-mlperf-inference-submission) * [generate-mlperf-inference-user-conf](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/generate-mlperf-inference-user-conf) @@ -514,11 +519,14 @@ for a given task and platform [using simple JSON or YAML](https://github.com/mlc * [get-ml-model-tiny-resnet](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-tiny-resnet) * [get-ml-model-using-imagenet-from-model-zoo](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-using-imagenet-from-model-zoo) * [get-mlcommons-croissant](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlcommons-croissant) +* [get-mlperf-inference-intel-scratch-space](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-intel-scratch-space) * [get-mlperf-inference-loadgen](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-loadgen) * [get-mlperf-inference-nvidia-common-code](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-nvidia-common-code) * [get-mlperf-inference-nvidia-scratch-space](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-nvidia-scratch-space) * [get-mlperf-inference-results](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-results) +* [get-mlperf-inference-results-dir](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-results-dir) * [get-mlperf-inference-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-src) +* [get-mlperf-inference-submission-dir](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-submission-dir) * [get-mlperf-inference-sut-configs](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-sut-configs) * [get-mlperf-inference-sut-description](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-sut-description) * [get-mlperf-logging](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-logging) @@ -569,23 +577,24 @@ for a given task and platform [using simple JSON or YAML](https://github.com/mlc * [install-generic-conda-package](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-generic-conda-package) * [install-gflags](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-gflags) * [install-github-cli](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-github-cli) -* [install-ipex-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-ipex-from.src) *(Build IPEX from sources)* +* [install-ipex-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-ipex-from-src) *(Build IPEX from sources)* * [install-llvm-prebuilt](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-llvm-prebuilt) *(Install prebuilt LLVM compiler)* * [install-llvm-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-llvm-src) *(Build LLVM compiler from sources (can take >30 min))* -* [install-mlperf-logging-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-mlperf-logging-from.src) -* [install-onednn-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onednn-from.src) *(Build oneDNN from sources)* -* [install-onnxruntime-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onnxruntime-from.src) *(Build onnxruntime from sources)* +* [install-mlperf-logging-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-mlperf-logging-from-src) +* [install-nccl-libs](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-nccl-libs) +* [install-onednn-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onednn-from-src) *(Build oneDNN from sources)* +* [install-onnxruntime-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-onnxruntime-from-src) *(Build onnxruntime from sources)* * [install-openssl](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-openssl) * [install-python-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-python-src) * [install-python-venv](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-python-venv) -* [install-pytorch-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-pytorch-from.src) *(Build pytorch from sources)* -* [install-qaic-compute-sdk-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-qaic-compute-sdk-from.src) +* [install-pytorch-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-pytorch-from-src) *(Build pytorch from sources)* +* [install-qaic-compute-sdk-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-qaic-compute-sdk-from-src) * [install-rocm](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-rocm) * [install-tensorflow-for-c](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-tensorflow-for-c) * [install-tensorflow-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-tensorflow-from-src) * [install-terraform-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-terraform-from-src) * [install-tflite-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-tflite-from-src) -* [install-transformers-from.src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-transformers-from.src) *(Build transformers from sources)* +* [install-transformers-from-src](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/install-transformers-from-src) *(Build transformers from sources)* * [prepare-training-data-bert](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/prepare-training-data-bert) * [prepare-training-data-resnet](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/prepare-training-data-resnet) * [preprocess-mlperf-inference-submission](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/preprocess-mlperf-inference-submission) @@ -634,7 +643,3 @@ for a given task and platform [using simple JSON or YAML](https://github.com/mlc - -# Community developments - -* [Discord server](https://discord.gg/JjWNWXKxwT) diff --git a/docs/misc/history.md b/docs/misc/history.md index ff37b62c18..3997dff636 100644 --- a/docs/misc/history.md +++ b/docs/misc/history.md @@ -65,5 +65,5 @@ optimization and deployment across continuously changing software, hardware and We would like to thank [MLCommons](https://mlcommons.org), [OctoML](https://octoml.ai), all [contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) and [collaborators](https://cKnowledge.org/partners.html) for their support, fruitful discussions, -and useful feedback! See more acknowledgments in the [CK journal article](https://arxiv.org/abs/2011.01149) +and useful feedback! See more acknowledgments in the [CK journal article](https://doi.org/10.1098/rsta.2020.0211) and our [ACM TechTalk](https://www.youtube.com/watch?v=7zpeIVwICa4). diff --git a/docs/misc/overview.md b/docs/misc/overview.md index e33b1c2c22..cba729caa6 100644 --- a/docs/misc/overview.md +++ b/docs/misc/overview.md @@ -5,7 +5,7 @@ The Collective Knowledge project is motivated by our [tedious experience](https://learning.acm.org/techtalks/reproducibility) reproducing research papers on machine learning and systems and validating them in the real world. -We have developed the [Collective Knowledge concept (CK)](https://arxiv.org/pdf/2011.01149.pdf) +We have developed the [Collective Knowledge concept (CK)](https://doi.org/10.1098/rsta.2020.0211) to provide a simple way to unify, manage, connect and reuse any artifacts, scripts, tools and workflows on any platform with any software and hardware stack. diff --git a/docs/mlperf/inference/bert/tutorial.md b/docs/mlperf/inference/bert/tutorial.md index e2e7d7301f..8f718cf2f2 100644 --- a/docs/mlperf/inference/bert/tutorial.md +++ b/docs/mlperf/inference/bert/tutorial.md @@ -35,7 +35,7 @@ The MLCommons is developing an open-source and technology-neutral to modularize ML Systems and automate their benchmarking, optimization and design space exploration across continuously changing software, hardware and data. -CM is the second generation of the [MLCommons CK workflow automation framework](https://arxiv.org/pdf/2011.01149.pdf) +CM is the second generation of the [MLCommons CK workflow automation framework](https://doi.org/10.1098/rsta.2020.0211) that was originally developed to make it easier to [reproduce research papers at ML and Systems conferences](https://learning.acm.org/techtalks/reproducibility). The goal is to help researchers unify and automate all the steps to prepare and run MLPerf and other benchmarks across diverse ML models, datasets, frameworks, compilers and hardware (see [HPCA'22 presentation](https://doi.org/10.5281/zenodo.6475385) about our motivation). diff --git a/docs/tutorials/sc22-scc-mlperf.md b/docs/tutorials/sc22-scc-mlperf.md index e0eb4f0320..89b12accad 100644 --- a/docs/tutorials/sc22-scc-mlperf.md +++ b/docs/tutorials/sc22-scc-mlperf.md @@ -88,7 +88,7 @@ The MLCommons is developing an open-source and technology-neutral to modularize ML Systems and automate their benchmarking, optimization and design space exploration across continuously changing software, hardware and data. -CM is the second generation of the [MLCommons CK workflow automation framework](https://arxiv.org/pdf/2011.01149.pdf) +CM is the second generation of the [MLCommons CK workflow automation framework](https://doi.org/10.1098/rsta.2020.0211) that was originally developed to make it easier to [reproduce research papers at ML and Systems conferences](https://learning.acm.org/techtalks/reproducibility). The goal is to help researchers unify and automate all the steps to prepare and run MLPerf and other benchmarks across diverse ML models, datasets, frameworks, compilers and hardware (see [HPCA'22 presentation](https://doi.org/10.5281/zenodo.6475385) about our motivation). diff --git a/platform/README.md b/platform/README.md index e75de5c89a..59cfc9ec3f 100644 --- a/platform/README.md +++ b/platform/README.md @@ -43,7 +43,7 @@ led by [Grigori Fursin](https://cKnowledge.org/gfursin) and * Join our [public conf-calls](https://docs.google.com/document/d/1zMNK1m_LhWm6jimZK6YE05hu4VH9usdbKJ3nBy-ZPAw). * Check our [news](docs/news.md). * Check our [presentation](https://doi.org/10.5281/zenodo.7871070) with development plans. -* Read about our [CK concept (previous version before MLCommons)](https://arxiv.org/abs/2011.01149). +* Read about our [CK concept (previous version before MLCommons)](https://doi.org/10.1098/rsta.2020.0211). #### Source code for on-prem use