From 556de815c506927d32dbb58bde44d4c42ef84f7b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 30 Jan 2024 13:46:36 +0000 Subject: [PATCH 1/5] Support cuda 12.1.1 --- cm-mlops/script/install-cuda-prebuilt/_cm.json | 5 +++++ cm-mlops/script/install-cuda-prebuilt/customize.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cm-mlops/script/install-cuda-prebuilt/_cm.json b/cm-mlops/script/install-cuda-prebuilt/_cm.json index 7abbdf63b4..f8e729ad66 100644 --- a/cm-mlops/script/install-cuda-prebuilt/_cm.json +++ b/cm-mlops/script/install-cuda-prebuilt/_cm.json @@ -69,6 +69,11 @@ "CM_CUDA_LINUX_FILENAME": "cuda_12.0.0_525.60.13_linux.run" } }, + "12.1.1": { + "env": { + "CM_CUDA_LINUX_FILENAME": "cuda_12.1.1_530.30.02_linux.run" + } + }, "12.2.0": { "env": { "CM_CUDA_LINUX_FILENAME": "cuda_12.2.0_535.54.03_linux.run" diff --git a/cm-mlops/script/install-cuda-prebuilt/customize.py b/cm-mlops/script/install-cuda-prebuilt/customize.py index 74fa67fbb8..a9dafa2e9a 100644 --- a/cm-mlops/script/install-cuda-prebuilt/customize.py +++ b/cm-mlops/script/install-cuda-prebuilt/customize.py @@ -10,7 +10,7 @@ def preprocess(i): automation = i['automation'] version = env.get('CM_VERSION') if version not in env.get('CM_CUDA_LINUX_FILENAME', ''): - return {'return': 1, 'error': "Only CUDA versions 11.7.0, 11.8.0, 12.0.0 and 12.2.0 are supported now!"} + return {'return': 1, 'error': "Only CUDA versions 11.7.0, 11.8.0, 12.0.0, 12.1.1 and 12.2.3 are supported now!"} recursion_spaces = i['recursion_spaces'] nvcc_bin = "nvcc" From e085fac4cd6e73fbf50da1a05b3fc368a2101781 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 30 Jan 2024 15:27:35 +0100 Subject: [PATCH 2/5] docs clean up --- cm-mlops/script/README.md | 45 +-------------------------------------- docs/taskforce.md | 6 +++--- 2 files changed, 4 insertions(+), 47 deletions(-) diff --git a/cm-mlops/script/README.md b/cm-mlops/script/README.md index 21efec3440..a9e5e41450 100644 --- a/cm-mlops/script/README.md +++ b/cm-mlops/script/README.md @@ -2,7 +2,7 @@ This is a source code of portable and reusable automation recipes from MLCommons projects with a [human-friendly CM interface](https://github.com/mlcommons/ck) - -you can find more details [here](../../docs/list_of_scripts.md). +you can find a human-readable catalog of these automation recipes [here](../../docs/list_of_scripts.md). ### License @@ -11,46 +11,3 @@ you can find more details [here](../../docs/list_of_scripts.md). ### Copyright 2022-2024 [MLCommons](https://mlcommons.org) - -### Examples - -```bash -pip install cmind - -cm pull repo mlcommons@ck - -cm run script "python app image-classification onnx" - -cm run script "download file _wget" --url=https://cKnowledge.org/ai/data/computer_mouse.jpg --verify=no --env.CM_DOWNLOAD_CHECKSUM=45ae5c940233892c2f860efdf0b66e7e - -cm run script "python app image-classification onnx" --input=computer_mouse.jpg - -cm docker script "python app image-classification onnx" --input=computer_mouse.jpg -cm docker script "python app image-classification onnx" --input=computer_mouse.jpg -j -docker_it - -cm run script "get generic-python-lib _package.onnxruntime" -cm run script "get coco dataset _val _2014" -cm run script "get ml-model stable-diffusion" -cm run script "get ml-model huggingface zoo _model-stub.alpindale/Llama-2-13b-ONNX" --model_filename=FP32/LlamaV2_13B_float32.onnx --skip_cache - -cm show cache -cm show cache "get ml-model stable-diffusion" - -cm run script "run common mlperf inference" --implementation=nvidia --model=bert-99 --category=datacenter --division=closed -cm find script "run common mlperf inference" - -cm pull repo ctuning@cm-reproduce-research-projects -cmr "reproduce paper micro-2023 victima _install_deps" -cmr "reproduce paper micro-2023 victima _run" - -... - -``` - -```python -import cmind -output=cmind.access({'action':'run', 'automation':'script', - 'tags':'python,app,image-classification,onnx', - 'input':'computer_mouse.jpg'}) -if output['return']==0: print (output) -``` diff --git a/docs/taskforce.md b/docs/taskforce.md index a6c49d043a..8a08e54014 100644 --- a/docs/taskforce.md +++ b/docs/taskforce.md @@ -4,11 +4,11 @@ ## Mission -* Develop [reusable automation recipes and workflows for MLOps and DevOps](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) +* Develop [reusable automation recipes and workflows](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) with [a common and human-friendly interface (Collective Mind aka CM)](https://github.com/mlcommons/ck) - to help everyone run, reproduce, customize and optimize different implementations of MLPerf benchmarks + to support MLCommons projects and help everyone assemble, run, reproduce, customize and optimize ML(Perf) benchmarks in a unified and automated way across diverse models, data sets, software and hardware from different vendors. -* Gradually add the MLCommons CM interface to [all MLPerf inference submissions](https://github.com/mlcommons/ck/issues/1052) starting from v3.1. +* Gradually extend a unified MLCommons CM interface to automate [all MLPerf inference submissions](https://github.com/mlcommons/ck/issues/1052) starting from v3.1. * Continuously encode MLPerf rules and best practices in the [CM automation recipes and workflows for MLPerf](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) to reduce the burden for submitters to go through numerous README files and track all the latest changes and updates. From 19fb27b90069198094384ae6e0d40166a50fe304 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 30 Jan 2024 15:44:55 +0100 Subject: [PATCH 3/5] clean up --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index f3a728ec28..17b584f6b2 100755 --- a/README.md +++ b/README.md @@ -17,16 +17,18 @@ ### About -Collective Mind (CM) is a human-friendly interface to help everyone run, manage and reuse -a [growing number of MLPerf, MLOps and DevOps scripts](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) -from [MLCommons projects](https://mlcommons.org) and [research papers](https://cTuning.org/ae) -in a unified way on any operating system with any software and hardware -either natively or inside containers. +Collective Mind (CM) is a [collection of portable, reusable and extensible +automation recipes](https://github.com/mlcommons/ck/tree/master/docs/list_of_scripts.md) +from [MLCommons projects](https://mlcommons.org) and [research papers](https://www.youtube.com/watch?v=7zpeIVwICa4) +with a human-friendly interface to help everyone assemble, run, benchmark +and optimize complex AI/ML applications and systems +in a unified way on any operating system with any software and hardware. Here are a few most commonly used examples from the CM users that should run in the same way on Linux, MacOS, Windows and other platforms -(see [Getting Started Guide](docs/getting-started.md) to understand -how they work and how to reuse them in your projects): +with any hardware (see the [Getting Started Guide](docs/getting-started.md) +to understand how they work, how to reuse and extend them for your projects +and how to share your own ones): ```bash pip install cmind @@ -100,11 +102,9 @@ output=cmind.access({'action':'run', 'automation':'script', if output['return']==0: print (output) ``` - Collective Mind is a community project being developed by the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) -with great help from [MLCommons (70+ AI organizations)](https://mlcommons.org/, -[research community]( https://www.youtube.com/watch?v=7zpeIVwICa4 ) +with great help from [MLCommons (70+ AI organizations)](https://mlcommons.org), and [individual contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) - we want to have a simple, non-intrusive, technology-agnostic, portable and easily-extensible interface to automate all our manual and repetitive tasks including From 663a0583c15be5c90880355bbfd4d8b59955b552 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 30 Jan 2024 14:45:52 +0000 Subject: [PATCH 4/5] Use ctuning fork for intel mlperf inference --- cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml index 5011cec210..82aa963a7e 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-intel/_cm.yaml @@ -137,7 +137,7 @@ deps: ######################################################################## # Install MLPerf inference dependencies - - tags: get,mlperf,inference,results + - tags: get,mlperf,inference,results,_ctuning names: inference-results version: v3.1 From 552a141478a7cc2921a2d1eb2ee6443ffa77b659 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 30 Jan 2024 20:22:12 +0530 Subject: [PATCH 5/5] Support gptj in nvidia-harness --- .../reproduce-mlperf-inference-nvidia/_cm.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index 6a3f615ae9..9685465cf5 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -406,6 +406,24 @@ variations: env: CM_MODEL: dlrm-v2-99.9 + gptj_: + deps: + - tags: get,generic-python-lib,_torch + + gptj-99: + group: model + base: + - gptj_ + env: + CM_MODEL: gptj-99 + + gptj-99.9: + group: model + base: + - gptj_ + env: + CM_MODEL: gptj-99.9 + batch_size.#: group: batch-size env: