Merge from CTuning (#1132)

mlcommons · Feb 26, 2024 · ce707e2 · ce707e2
2 parents 1902a28 + 2784d26
commit ce707e2
Show file tree

Hide file tree

Showing 21 changed files with 151 additions and 123 deletions.
diff --git a/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml b/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml
@@ -905,6 +905,10 @@ variations:
     base:
       - llama2-70b_
 
+  llama2-70b_,cuda:
+    default_env:
+      CM_MLPERF_LOADGEN_BATCH_SIZE: 8
+
   llama2-70b-99.9:
     group: models
     env:
@@ -1113,6 +1117,7 @@ variations:
     alias: int8
 
   batch_size.#:
+    group: batch-size
     env:
       CM_MLPERF_LOADGEN_MAX_BATCHSIZE: "#"
     add_deps_recursive:

diff --git a/cm-mlops/script/app-mlperf-inference-reference/customize.py b/cm-mlops/script/app-mlperf-inference-reference/customize.py
@@ -54,8 +54,11 @@ def preprocess(i):
         else:
             env['CM_NUM_THREADS'] = env.get('CM_HOST_CPU_TOTAL_CORES', '1')
 
-    if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE','') != '' and not env.get('CM_MLPERF_MODEL_SKIP_BATCHING', False) :
-        env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --max-batchsize " + env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE']
+    if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE','') != '' and not env.get('CM_MLPERF_MODEL_SKIP_BATCHING', False):
+        env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --max-batchsize " + str(env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE'])
+
+    if env.get('CM_MLPERF_LOADGEN_BATCH_SIZE','') != '':
+        env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --batch-size " + str(env['CM_MLPERF_LOADGEN_BATCH_SIZE'])
 
     if env.get('CM_MLPERF_LOADGEN_QUERY_COUNT','') != '' and not env.get('CM_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and (env['CM_MLPERF_LOADGEN_MODE'] == 'accuracy' or 'gptj' in env['CM_MODEL']) and env.get('CM_MLPERF_RUN_STYLE','') != "valid":
         env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --count " + env['CM_MLPERF_LOADGEN_QUERY_COUNT']
@@ -297,6 +300,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
                 " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \
                 ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
                 " --model-path " + env['MODEL_DIR']
+        cmd = cmd.replace("--count", "--total-sample-count")
     elif "3d-unet" in env['CM_MODEL']:
 
         env['RUN_DIR'] = env['CM_MLPERF_INFERENCE_3DUNET_PATH']

diff --git a/cm-mlops/script/app-mlperf-inference/_cm.yaml b/cm-mlops/script/app-mlperf-inference/_cm.yaml
@@ -459,11 +459,10 @@ variations:
       skip_if_env:
         CM_MLPERF_IMPLEMENTATION:
         - nvidia-original
-        - reference
       names:
       - mlperf-accuracy-script
       - open-orca-accuracy-script
-      tags: run,accuracy,mlperf,_open-orca
+      tags: run,accuracy,mlperf,_open-orca,_int32
 
   llama2-70b-99:
     group:

diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py
@@ -224,7 +224,7 @@ def postprocess(i):
             state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario] = {}
 
         state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][mode] = result
-        state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][mode+'_valid'] = valid[mode]
+        state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][mode+'_valid'] = valid.get(mode, False)
 
         state['cm-mlperf-inference-results-last'][mode] = result
         state['cm-mlperf-inference-results-last'][mode+'_valid'] = valid[mode]

diff --git a/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml b/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml
@@ -140,9 +140,6 @@ post_deps:
       - "no"
       - False
       - "False"
-    enable_if_env:
-      CM_RUN_STATE_DOCKER:
-      - False
 
 variations:
   # Target devices
@@ -236,9 +233,10 @@ docker:
     - tags: get,mlperf,inference,nvidia,scratch,space
     - tags: get,mlperf,inference,results,dir
     - tags: get,mlperf,inference,submission,dir
+    - tags: get,nvidia-docker
   pre_run_cmds:
-    - cm pull repo ctuning@mlcommons-ck
-  run_cmd_prefix: sudo apt remove -y cmake && cm pull repo ctuning@mlcommons-ck
+    - cm pull repo mlcommons@ck
+  run_cmd_prefix: sudo apt remove -y cmake && cm pull repo mlcommons@ck
   mounts:
    - "${{ IMAGENET_PATH }}:/data/imagenet-val"
    - "${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}:${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}"

diff --git a/cm-mlops/script/get-ml-model-llama2/_cm.json b/cm-mlops/script/get-ml-model-llama2/_cm.json
@@ -23,16 +23,14 @@
         ]
       },
       "env": {
-        "CM_GIT_CHECKOUT_FOLDER": "Llama-2-70b-chat-hf",
-        "CM_MODEL_ZOO_ENV_KEY": "LLAMA2"
       },
       "force_env_keys": [
         "CM_GIT_CHECKOUT_FOLDER"
       ],
       "names": [
         "hf-zoo"
       ],
-      "tags": "get,ml-model,huggingface,zoo,_clone-repo,_model-stub.meta-llama/Llama-2-70b-chat-hf"
+      "tags": "get,ml-model,huggingface,zoo,_clone-repo"
     }
   ],
   "tags": [
@@ -46,6 +44,42 @@
   ],
   "uid": "5db97be9f61244c6",
   "variations": {
+    "meta-llama/Llama-2-70b-chat-hf": {
+      "group": "huggingface-stub",
+      "default": true,
+      "env": {
+        "CM_GIT_CHECKOUT_FOLDER": "Llama-2-70b-chat-hf",
+        "CM_MODEL_ZOO_ENV_KEY": "LLAMA2"
+      },
+      "adr": {
+        "hf-zoo": {
+          "tags": "_model-stub.meta-llama/Llama-2-70b-chat-hf"
+        }
+      }
+    },
+    "meta-llama/Llama-2-7b-chat-hf": {
+      "group": "huggingface-stub",
+      "env": {
+        "CM_GIT_CHECKOUT_FOLDER": "Llama-2-7b-chat-hf",
+        "CM_MODEL_ZOO_ENV_KEY": "LLAMA2"
+      },
+      "adr": {
+        "hf-zoo": {
+          "tags": "_model-stub.meta-llama/Llama-2-7b-chat-hf"
+        }
+      }
+    },
+    "stub.#": {
+      "group": "huggingface-stub",
+      "env": {
+        "CM_MODEL_ZOO_ENV_KEY": "LLAMA2"
+      },
+      "adr": {
+        "hf-zoo": {
+          "tags": "_model-stub.#"
+        }
+      }
+    },
     "batch_size.#": {
       "env": {
         "CM_ML_MODEL_BATCH_SIZE": "#"

diff --git a/cm-mlops/script/process-mlperf-accuracy/_cm.json b/cm-mlops/script/process-mlperf-accuracy/_cm.json
@@ -230,6 +230,26 @@
       },
       "group": "dataset"
     },
+    "open-orca": {
+      "deps": [
+        {
+          "names": [
+            "openorca-dataset"
+          ],
+          "tags": "get,dataset,openorca,preprocessed"
+        },
+        {
+          "names": [
+            "llama2-model"
+          ],
+          "tags": "get,ml-model,llama2,_meta-llama/Llama-2-7b-chat-hf"
+        }
+      ],
+      "env": {
+        "CM_DATASET": "openorca"
+      },
+      "group": "dataset"
+    },
     "coco2014": {
       "deps": [
         {

diff --git a/cm-mlops/script/process-mlperf-accuracy/customize.py b/cm-mlops/script/process-mlperf-accuracy/customize.py
@@ -67,6 +67,11 @@ def preprocess(i):
                 "evaluation.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \
                 "' --dataset-file '" + env['CM_DATASET_EVAL_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32")  +" > '" + out_file + "'"
 
+        elif dataset == "openorca":
+            CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b",
+                "evaluate-accuracy.py") + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA2_FILE_WITH_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \
+                "' --dataset-file '" + env['CM_DATASET_PREPROCESSED_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "int32")  +" > '" + out_file + "'"
+
 
         elif dataset == "coco2014":
             env['+PYTHONPATH'] = [ os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools") ]

diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml
@@ -581,8 +581,12 @@ variations:
         tags: _nsp.14
 
   nsp.16:
+    group: nsp
     base:
       - pro
+    adr:
+      qaic-model-compiler:
+        tags: _nsp.14
 
   nsp.#:
     group: nsp
@@ -666,16 +670,36 @@ variations:
       - tags: set,device,qaic,_vc.16
 
   pro,num-devices.4,bert-99,offline:
+    default_variations:
+      loadgen-batch-size: loadgen-batch-size.4096
     env:
       qaic_activation_count: "16"
     deps:
       - tags: set,device,qaic,_vc.15
 
   pro,num-devices.4,bert-99.9,offline:
+    default_variations:
+      loadgen-batch-size: loadgen-batch-size.4096
     env:
       qaic_activation_count: "8"
     deps:
-      - tags: set,device,qaic,_vc.17
+      - tags: set,device,qaic,_vc.13
+
+  pro,num-devices.4,bert-99,server:
+    default_variations:
+      loadgen-batch-size: loadgen-batch-size.1024
+    env:
+      qaic_activation_count: "16"
+    deps:
+      - tags: set,device,qaic,_vc.13
+
+  pro,num-devices.4,bert-99.9,server:
+    default_variations:
+      loadgen-batch-size: loadgen-batch-size.1024
+    env:
+      qaic_activation_count: "8"
+    deps:
+      - tags: set,device,qaic,_vc.13
 
   pro,num-devices.4,retinanet,offline:
     default_variations:
@@ -691,10 +715,6 @@ variations:
     env:
       qaic_activation_count: "4"
 
-  pro,num-devices.4,bert-99.9,server:
-    env:
-      qaic_activation_count: "16"
-
   pro,num-devices.4,retinanet,server:
     default_variations:
       batch-size: bs.1

diff --git a/cm-mlops/script/run-mlperf-inference-app/_cm.yaml b/cm-mlops/script/run-mlperf-inference-app/_cm.yaml
@@ -182,6 +182,7 @@ variations:
         tags: _full
     env:
       CM_MLPERF_SUBMISSION_GENERATION_STYLE: full
+      CM_MLPERF_SKIP_SUBMISSION_GENERATION: 'yes'
     group: submission-generation-style
 
   performance-only:
@@ -254,10 +255,12 @@ variations:
     post_deps:
     - names:
       - submission-generator
-      skip_if_env:
+      enable_if_env:
         CM_MLPERF_SKIP_SUBMISSION_GENERATION:
-        - 'yes'
-        - 'True'
+        - 'no'
+        - 'false'
+        - 'False'
+        - '0'
       tags: generate,mlperf,inference,submission
 
 versions:

diff --git a/cm-mlops/script/truncate-mlperf-inference-accuracy-log/customize.py b/cm-mlops/script/truncate-mlperf-inference-accuracy-log/customize.py
@@ -14,7 +14,7 @@ def preprocess(i):
         print("Please set CM_MLPERF_INFERENCE_SUBMISSION_DIR")
         return {'return': 1, 'error':'CM_MLPERF_INFERENCE_SUBMISSION_DIR is not specified in env in run-mlperf-accuracy-log-truncator'}
 
-    submitter = env.get("CM_MLPERF_SUBMITTER", "cTuning")
+    submitter = env.get("CM_MLPERF_SUBMITTER", "CTuning")
 
     os.system("rm -rf " + submission_dir + "_logs")
 

diff --git a/docs/mlperf/inference/3d-unet/README_nvidia.md b/docs/mlperf/inference/3d-unet/README_nvidia.md
@@ -1,9 +1,10 @@
 [ [Back to index](README.md) ]
 
-## Prepare Nvidia software
-
-You need to install TensorRT and set up the configuration files as detailed [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/reproduce-mlperf-inference-nvidia/README-about.md).
+## Build Nvidia Docker Container (from 3.1 Inference round)
 
+```
+cm docker script --tags=build,nvidia,inference,server
+```
 ## Run this benchmark via CM
 
 *Note: from Feb 2024, we suggest you to use [this GUI](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725)
@@ -27,22 +28,13 @@ cmr "generate-run-cmds inference _find-performance _all-scenarios" \
 ```
 cmr "generate-run-cmds inference _submission _all-scenarios" --model=3d-unet-99 \
 --device=cuda --implementation=nvidia-original --backend=tensorrt \
---execution-mode=valid --results_dir=$HOME/results_dir \
---category=edge --division=open --quiet
+--execution-mode=valid --category=edge --division=open --quiet
 ```
 
 * Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs
 * Use `--division=closed` to run all scenarios for the closed division including the compliance tests
-* `--offline_target_qps`, `--server_target_qps`, and `--singlestream_target_latency` can be used to override the determined performance numbers
+* `--offline_target_qps`, and `--singlestream_target_latency` can be used to override the determined performance numbers
 
-### Populate the README files describing your submission
-
-```
-cmr "generate-run-cmds inference _populate-readme _all-scenarios" \
---model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \
---execution-mode=valid --results_dir=$HOME/results_dir \
---category=edge --division=open --quiet
-```
 
 ### Generate and upload MLPerf submission
 

diff --git a/docs/mlperf/inference/3d-unet/README_reference.md b/docs/mlperf/inference/3d-unet/README_reference.md
@@ -25,34 +25,17 @@ cmr "generate-run-cmds inference _find-performance _all-scenarios" \
 ```
 cmr "generate-run-cmds inference _submission _all-scenarios" --model=3d-unet-99.9 \
 --device=cpu --implementation=reference --backend=onnxruntime \
---execution-mode=valid --results_dir=$HOME/inference_3.1_results \
---category=edge --division=open --quiet
+--execution-mode=valid --category=edge --division=open --quiet
 ```
 
 * Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs
 * Use `--division=closed` to run all scenarios for the closed division including the compliance tests
-* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers
-
-### Populate the README files describing your submission
-
-```
-cmr "generate-run-cmds inference _populate-readme _all-scenarios" \
---model=3d-unet-99.9 --device=cpu --implementation=reference --backend=onnxruntime \
---execution-mode=valid --results_dir=$HOME/inference_3.1_results \
---category=edge --division=open --quiet
-```
+* `--offline_target_qps`, and `--singlestream_target_latency` can be used to override the determined performance numbers
 
-### Generate actual submission tree
+### Generate and upload MLPerf submission
 
-Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree.
+Follow [this guide](../Submission.md) to generate the submission tree and upload your results.
 
-We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes.
-```
-cmr "generate inference submission" --results_dir=$HOME/inference_3.1_results/valid_results \
---submission_dir=$HOME/inference_submission_tree --clean  \
---run-checker --submitter=cTuning --adr.inference-src.version=master \
---hw_notes_extra="Result taken by NAME" --quiet
-```
 
 ### Questions? Suggestions?
 

diff --git a/docs/mlperf/inference/bert/README_nvidia.md b/docs/mlperf/inference/bert/README_nvidia.md
@@ -11,12 +11,10 @@ cm docker script --tags=build,nvidia,inference,server
 ### Do a test run to detect and record the system performance
 
 ```
-cmr "generate-run-cmds inference _find-performance _all-scenarios" \
+cmr "generate-run-cmds inference _find-performance" --scenario=Offline \
 --model=bert-99 --implementation=nvidia-original --device=cuda --backend=tensorrt \
 --category=edge --division=open --quiet
 ```
-* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode)
-* Use `--category=datacenter` to run datacenter scenarios (only for bert-99.9)
 * Use `--model=bert-99.9` to run the high-accuracy model (only for datacenter)
 * Use `--rerun` to force a rerun even when result files (from a previous run) exist
 
@@ -25,13 +23,13 @@ cmr "generate-run-cmds inference _find-performance _all-scenarios" \
 ```
 cmr "generate-run-cmds inference _submission _all-scenarios" --model=bert-99 \
 --device=cuda --implementation=nvidia-original --backend=tensorrt \
---execution-mode=valid --results_dir=$HOME/results_dir \
---category=edge --division=open --quiet
+--execution-mode=valid --category=edge --division=open --quiet
 ```
 
+* Use `--category=datacenter` to run datacenter scenarios (only for bert-99.9)
 * Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs
 * Use `--division=closed` to run all scenarios for the closed division including the compliance tests
-* `--offline_target_qps`, `--server_target_qps`, and `--singlestream_target_latency` can be used to override the determined performance numbers
+* `--offline_target_qps`, `--server_target_qps`, and `--singlestream_target_latency` can be used to pass in the performance numbers
 
 
 ### Generate and upload MLPerf submission

diff --git a/docs/mlperf/inference/dlrm_v2/README_nvidia.md b/docs/mlperf/inference/dlrm_v2/README_nvidia.md
@@ -3,7 +3,7 @@
 ## Build Nvidia Docker Container (from 3.1 Inference round)
 
 ```
-cm docker script --tags=build,nvidia,inference,server
+cm docker script --tags=build,nvidia,inference,server --dlrm_data_path=<Path to dlrm data>
 ```
 ## Run this benchmark via CM