Skip to content

Commit

Permalink
Merge from CTuning (#1132)
Browse files Browse the repository at this point in the history
  • Loading branch information
gfursin authored Feb 26, 2024
2 parents 1902a28 + 2784d26 commit ce707e2
Show file tree
Hide file tree
Showing 21 changed files with 151 additions and 123 deletions.
5 changes: 5 additions & 0 deletions cm-mlops/script/app-mlperf-inference-reference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,10 @@ variations:
base:
- llama2-70b_

llama2-70b_,cuda:
default_env:
CM_MLPERF_LOADGEN_BATCH_SIZE: 8

llama2-70b-99.9:
group: models
env:
Expand Down Expand Up @@ -1113,6 +1117,7 @@ variations:
alias: int8

batch_size.#:
group: batch-size
env:
CM_MLPERF_LOADGEN_MAX_BATCHSIZE: "#"
add_deps_recursive:
Expand Down
8 changes: 6 additions & 2 deletions cm-mlops/script/app-mlperf-inference-reference/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,11 @@ def preprocess(i):
else:
env['CM_NUM_THREADS'] = env.get('CM_HOST_CPU_TOTAL_CORES', '1')

if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE','') != '' and not env.get('CM_MLPERF_MODEL_SKIP_BATCHING', False) :
env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --max-batchsize " + env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE']
if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE','') != '' and not env.get('CM_MLPERF_MODEL_SKIP_BATCHING', False):
env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --max-batchsize " + str(env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE'])

if env.get('CM_MLPERF_LOADGEN_BATCH_SIZE','') != '':
env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --batch-size " + str(env['CM_MLPERF_LOADGEN_BATCH_SIZE'])

if env.get('CM_MLPERF_LOADGEN_QUERY_COUNT','') != '' and not env.get('CM_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and (env['CM_MLPERF_LOADGEN_MODE'] == 'accuracy' or 'gptj' in env['CM_MODEL']) and env.get('CM_MLPERF_RUN_STYLE','') != "valid":
env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --count " + env['CM_MLPERF_LOADGEN_QUERY_COUNT']
Expand Down Expand Up @@ -297,6 +300,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
" --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
" --model-path " + env['MODEL_DIR']
cmd = cmd.replace("--count", "--total-sample-count")
elif "3d-unet" in env['CM_MODEL']:

env['RUN_DIR'] = env['CM_MLPERF_INFERENCE_3DUNET_PATH']
Expand Down
3 changes: 1 addition & 2 deletions cm-mlops/script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -459,11 +459,10 @@ variations:
skip_if_env:
CM_MLPERF_IMPLEMENTATION:
- nvidia-original
- reference
names:
- mlperf-accuracy-script
- open-orca-accuracy-script
tags: run,accuracy,mlperf,_open-orca
tags: run,accuracy,mlperf,_open-orca,_int32

llama2-70b-99:
group:
Expand Down
2 changes: 1 addition & 1 deletion cm-mlops/script/app-mlperf-inference/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def postprocess(i):
state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario] = {}

state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][mode] = result
state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][mode+'_valid'] = valid[mode]
state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][mode+'_valid'] = valid.get(mode, False)

state['cm-mlperf-inference-results-last'][mode] = result
state['cm-mlperf-inference-results-last'][mode+'_valid'] = valid[mode]
Expand Down
8 changes: 3 additions & 5 deletions cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,6 @@ post_deps:
- "no"
- False
- "False"
enable_if_env:
CM_RUN_STATE_DOCKER:
- False

variations:
# Target devices
Expand Down Expand Up @@ -236,9 +233,10 @@ docker:
- tags: get,mlperf,inference,nvidia,scratch,space
- tags: get,mlperf,inference,results,dir
- tags: get,mlperf,inference,submission,dir
- tags: get,nvidia-docker
pre_run_cmds:
- cm pull repo ctuning@mlcommons-ck
run_cmd_prefix: sudo apt remove -y cmake && cm pull repo ctuning@mlcommons-ck
- cm pull repo mlcommons@ck
run_cmd_prefix: sudo apt remove -y cmake && cm pull repo mlcommons@ck
mounts:
- "${{ IMAGENET_PATH }}:/data/imagenet-val"
- "${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}:${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}"
Expand Down
40 changes: 37 additions & 3 deletions cm-mlops/script/get-ml-model-llama2/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,14 @@
]
},
"env": {
"CM_GIT_CHECKOUT_FOLDER": "Llama-2-70b-chat-hf",
"CM_MODEL_ZOO_ENV_KEY": "LLAMA2"
},
"force_env_keys": [
"CM_GIT_CHECKOUT_FOLDER"
],
"names": [
"hf-zoo"
],
"tags": "get,ml-model,huggingface,zoo,_clone-repo,_model-stub.meta-llama/Llama-2-70b-chat-hf"
"tags": "get,ml-model,huggingface,zoo,_clone-repo"
}
],
"tags": [
Expand All @@ -46,6 +44,42 @@
],
"uid": "5db97be9f61244c6",
"variations": {
"meta-llama/Llama-2-70b-chat-hf": {
"group": "huggingface-stub",
"default": true,
"env": {
"CM_GIT_CHECKOUT_FOLDER": "Llama-2-70b-chat-hf",
"CM_MODEL_ZOO_ENV_KEY": "LLAMA2"
},
"adr": {
"hf-zoo": {
"tags": "_model-stub.meta-llama/Llama-2-70b-chat-hf"
}
}
},
"meta-llama/Llama-2-7b-chat-hf": {
"group": "huggingface-stub",
"env": {
"CM_GIT_CHECKOUT_FOLDER": "Llama-2-7b-chat-hf",
"CM_MODEL_ZOO_ENV_KEY": "LLAMA2"
},
"adr": {
"hf-zoo": {
"tags": "_model-stub.meta-llama/Llama-2-7b-chat-hf"
}
}
},
"stub.#": {
"group": "huggingface-stub",
"env": {
"CM_MODEL_ZOO_ENV_KEY": "LLAMA2"
},
"adr": {
"hf-zoo": {
"tags": "_model-stub.#"
}
}
},
"batch_size.#": {
"env": {
"CM_ML_MODEL_BATCH_SIZE": "#"
Expand Down
20 changes: 20 additions & 0 deletions cm-mlops/script/process-mlperf-accuracy/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,26 @@
},
"group": "dataset"
},
"open-orca": {
"deps": [
{
"names": [
"openorca-dataset"
],
"tags": "get,dataset,openorca,preprocessed"
},
{
"names": [
"llama2-model"
],
"tags": "get,ml-model,llama2,_meta-llama/Llama-2-7b-chat-hf"
}
],
"env": {
"CM_DATASET": "openorca"
},
"group": "dataset"
},
"coco2014": {
"deps": [
{
Expand Down
5 changes: 5 additions & 0 deletions cm-mlops/script/process-mlperf-accuracy/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def preprocess(i):
"evaluation.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \
"' --dataset-file '" + env['CM_DATASET_EVAL_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32") +" > '" + out_file + "'"

elif dataset == "openorca":
CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b",
"evaluate-accuracy.py") + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA2_FILE_WITH_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \
"' --dataset-file '" + env['CM_DATASET_PREPROCESSED_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "int32") +" > '" + out_file + "'"


elif dataset == "coco2014":
env['+PYTHONPATH'] = [ os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools") ]
Expand Down
30 changes: 25 additions & 5 deletions cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,12 @@ variations:
tags: _nsp.14

nsp.16:
group: nsp
base:
- pro
adr:
qaic-model-compiler:
tags: _nsp.14

nsp.#:
group: nsp
Expand Down Expand Up @@ -666,16 +670,36 @@ variations:
- tags: set,device,qaic,_vc.16

pro,num-devices.4,bert-99,offline:
default_variations:
loadgen-batch-size: loadgen-batch-size.4096
env:
qaic_activation_count: "16"
deps:
- tags: set,device,qaic,_vc.15

pro,num-devices.4,bert-99.9,offline:
default_variations:
loadgen-batch-size: loadgen-batch-size.4096
env:
qaic_activation_count: "8"
deps:
- tags: set,device,qaic,_vc.17
- tags: set,device,qaic,_vc.13

pro,num-devices.4,bert-99,server:
default_variations:
loadgen-batch-size: loadgen-batch-size.1024
env:
qaic_activation_count: "16"
deps:
- tags: set,device,qaic,_vc.13

pro,num-devices.4,bert-99.9,server:
default_variations:
loadgen-batch-size: loadgen-batch-size.1024
env:
qaic_activation_count: "8"
deps:
- tags: set,device,qaic,_vc.13

pro,num-devices.4,retinanet,offline:
default_variations:
Expand All @@ -691,10 +715,6 @@ variations:
env:
qaic_activation_count: "4"

pro,num-devices.4,bert-99.9,server:
env:
qaic_activation_count: "16"

pro,num-devices.4,retinanet,server:
default_variations:
batch-size: bs.1
Expand Down
9 changes: 6 additions & 3 deletions cm-mlops/script/run-mlperf-inference-app/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ variations:
tags: _full
env:
CM_MLPERF_SUBMISSION_GENERATION_STYLE: full
CM_MLPERF_SKIP_SUBMISSION_GENERATION: 'yes'
group: submission-generation-style

performance-only:
Expand Down Expand Up @@ -254,10 +255,12 @@ variations:
post_deps:
- names:
- submission-generator
skip_if_env:
enable_if_env:
CM_MLPERF_SKIP_SUBMISSION_GENERATION:
- 'yes'
- 'True'
- 'no'
- 'false'
- 'False'
- '0'
tags: generate,mlperf,inference,submission

versions:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def preprocess(i):
print("Please set CM_MLPERF_INFERENCE_SUBMISSION_DIR")
return {'return': 1, 'error':'CM_MLPERF_INFERENCE_SUBMISSION_DIR is not specified in env in run-mlperf-accuracy-log-truncator'}

submitter = env.get("CM_MLPERF_SUBMITTER", "cTuning")
submitter = env.get("CM_MLPERF_SUBMITTER", "CTuning")

os.system("rm -rf " + submission_dir + "_logs")

Expand Down
20 changes: 6 additions & 14 deletions docs/mlperf/inference/3d-unet/README_nvidia.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
[ [Back to index](README.md) ]

## Prepare Nvidia software

You need to install TensorRT and set up the configuration files as detailed [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/reproduce-mlperf-inference-nvidia/README-about.md).
## Build Nvidia Docker Container (from 3.1 Inference round)

```
cm docker script --tags=build,nvidia,inference,server
```
## Run this benchmark via CM

*Note: from Feb 2024, we suggest you to use [this GUI](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725)
Expand All @@ -27,22 +28,13 @@ cmr "generate-run-cmds inference _find-performance _all-scenarios" \
```
cmr "generate-run-cmds inference _submission _all-scenarios" --model=3d-unet-99 \
--device=cuda --implementation=nvidia-original --backend=tensorrt \
--execution-mode=valid --results_dir=$HOME/results_dir \
--category=edge --division=open --quiet
--execution-mode=valid --category=edge --division=open --quiet
```

* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs
* Use `--division=closed` to run all scenarios for the closed division including the compliance tests
* `--offline_target_qps`, `--server_target_qps`, and `--singlestream_target_latency` can be used to override the determined performance numbers
* `--offline_target_qps`, and `--singlestream_target_latency` can be used to override the determined performance numbers

### Populate the README files describing your submission

```
cmr "generate-run-cmds inference _populate-readme _all-scenarios" \
--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \
--execution-mode=valid --results_dir=$HOME/results_dir \
--category=edge --division=open --quiet
```

### Generate and upload MLPerf submission

Expand Down
25 changes: 4 additions & 21 deletions docs/mlperf/inference/3d-unet/README_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,17 @@ cmr "generate-run-cmds inference _find-performance _all-scenarios" \
```
cmr "generate-run-cmds inference _submission _all-scenarios" --model=3d-unet-99.9 \
--device=cpu --implementation=reference --backend=onnxruntime \
--execution-mode=valid --results_dir=$HOME/inference_3.1_results \
--category=edge --division=open --quiet
--execution-mode=valid --category=edge --division=open --quiet
```

* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs
* Use `--division=closed` to run all scenarios for the closed division including the compliance tests
* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers

### Populate the README files describing your submission

```
cmr "generate-run-cmds inference _populate-readme _all-scenarios" \
--model=3d-unet-99.9 --device=cpu --implementation=reference --backend=onnxruntime \
--execution-mode=valid --results_dir=$HOME/inference_3.1_results \
--category=edge --division=open --quiet
```
* `--offline_target_qps`, and `--singlestream_target_latency` can be used to override the determined performance numbers

### Generate actual submission tree
### Generate and upload MLPerf submission

Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree.
Follow [this guide](../Submission.md) to generate the submission tree and upload your results.

We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes.
```
cmr "generate inference submission" --results_dir=$HOME/inference_3.1_results/valid_results \
--submission_dir=$HOME/inference_submission_tree --clean \
--run-checker --submitter=cTuning --adr.inference-src.version=master \
--hw_notes_extra="Result taken by NAME" --quiet
```

### Questions? Suggestions?

Expand Down
10 changes: 4 additions & 6 deletions docs/mlperf/inference/bert/README_nvidia.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,10 @@ cm docker script --tags=build,nvidia,inference,server
### Do a test run to detect and record the system performance

```
cmr "generate-run-cmds inference _find-performance _all-scenarios" \
cmr "generate-run-cmds inference _find-performance" --scenario=Offline \
--model=bert-99 --implementation=nvidia-original --device=cuda --backend=tensorrt \
--category=edge --division=open --quiet
```
* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode)
* Use `--category=datacenter` to run datacenter scenarios (only for bert-99.9)
* Use `--model=bert-99.9` to run the high-accuracy model (only for datacenter)
* Use `--rerun` to force a rerun even when result files (from a previous run) exist

Expand All @@ -25,13 +23,13 @@ cmr "generate-run-cmds inference _find-performance _all-scenarios" \
```
cmr "generate-run-cmds inference _submission _all-scenarios" --model=bert-99 \
--device=cuda --implementation=nvidia-original --backend=tensorrt \
--execution-mode=valid --results_dir=$HOME/results_dir \
--category=edge --division=open --quiet
--execution-mode=valid --category=edge --division=open --quiet
```

* Use `--category=datacenter` to run datacenter scenarios (only for bert-99.9)
* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs
* Use `--division=closed` to run all scenarios for the closed division including the compliance tests
* `--offline_target_qps`, `--server_target_qps`, and `--singlestream_target_latency` can be used to override the determined performance numbers
* `--offline_target_qps`, `--server_target_qps`, and `--singlestream_target_latency` can be used to pass in the performance numbers


### Generate and upload MLPerf submission
Expand Down
2 changes: 1 addition & 1 deletion docs/mlperf/inference/dlrm_v2/README_nvidia.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
## Build Nvidia Docker Container (from 3.1 Inference round)

```
cm docker script --tags=build,nvidia,inference,server
cm docker script --tags=build,nvidia,inference,server --dlrm_data_path=<Path to dlrm data>
```
## Run this benchmark via CM

Expand Down
Loading

0 comments on commit ce707e2

Please sign in to comment.