mlcommons · arjunsuresh · Feb 19, 2024 · Feb 19, 2024 · Feb 19, 2024 · Feb 19, 2024
@@ -184,7 +184,7 @@ and how to implement and share new automations in your public or private project
 
 * ACM REP'23 keynote about MLCommons CM: [ [slides](https://doi.org/10.5281/zenodo.8105339) ] [ [YouTube](https://youtu.be/_1f9i_Bzjmg) ]
 * ACM TechTalk'21 about automating research projects: [ [YouTube](https://www.youtube.com/watch?v=7zpeIVwICa4) ] [ [slides](https://learning.acm.org/binaries/content/assets/leaning-center/webinar-slides/2021/grigorifursin_techtalk_slides.pdf) ]
-* MLPerf inference submitter orientation: [ [v4.0 slides](https://doi.org/10.5281/zenodo.10605079) ] [ [v3.1 slides](https://doi.org/10.5281/zenodo.8144274) ]
+* MLPerf inference submitter orientation: [ [v4.0 slides]( https://doi.org/10.5281/zenodo.10605079 ) ] [ [v3.1 slides](https://doi.org/10.5281/zenodo.8144274) ]
 
 ### Get in touch
 

@@ -407,7 +407,7 @@ def postprocess(i):
         OUTPUT_DIR = os.path.dirname(COMPLIANCE_DIR)
 
         SCRIPT_PATH = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "compliance", "nvidia", test, "run_verification.py")
-        cmd = env['CM_PYTHON_BIN'] + " " + SCRIPT_PATH + " -r " + RESULT_DIR + " -c " + COMPLIANCE_DIR + " -o "+ OUTPUT_DIR
+        cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " " + SCRIPT_PATH + " -r " + RESULT_DIR + " -c " + COMPLIANCE_DIR + " -o "+ OUTPUT_DIR
         print(cmd)
         os.system(cmd)
 
@@ -458,6 +458,9 @@ def postprocess(i):
                 env['CMD'] = CMD
                 r = automation.run_native_script({'run_script_input':run_script_input, 'env':env, 'script_name':'verify_accuracy'})
                 if r['return']>0: return r
+        import submission_checker as checker
+        is_valid = checker.check_compliance_perf_dir(COMPLIANCE_DIR)
+        state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][test] = "passed" if is_valid else "failed"
 
     else:
         print(test)

@@ -236,8 +236,6 @@ def generate_submission(i):
                             with open(os.path.join(result_mode_path, "system_meta.json"), "r") as f:
                                 saved_system_meta = json.load(f)
                                 for key in list(saved_system_meta):
-                                    print (saved_system_meta)
-                                    print (saved_system_meta[key])
                                     if saved_system_meta[key]==None or str(saved_system_meta[key]).strip() == '':
                                         del(saved_system_meta[key])
                                 system_meta = {**saved_system_meta, **system_meta} #override the saved meta with the user inputs
@@ -310,7 +308,7 @@ def generate_submission(i):
                         f.write("TBD") #create an empty README
                 else:
                     readme_suffix = ""
-                    result_string, result = mlperf_utils.get_result_string(env['CM_MLPERF_LAST_RELEASE'], model, scenario, result_scenario_path, power_run, sub_res)
+                    result_string, result = mlperf_utils.get_result_string(env['CM_MLPERF_LAST_RELEASE'], model, scenario, result_scenario_path, power_run, sub_res, division, system_file)
 
                     for key in result:
                         results[model][scenario][key] = result[key]

@@ -124,7 +124,7 @@ def get_accuracy_metric(config, model, path):
 
     return is_valid, acc_results, acc_targets, acc_limits
 
-def get_result_string(version, model, scenario, result_path, has_power, sub_res):
+def get_result_string(version, model, scenario, result_path, has_power, sub_res, division="open", system_json=None):
 
     config = checker.Config(
         version,
@@ -167,6 +167,21 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res)
         result['power'] = power_result
         result['power_efficiency'] = power_efficiency_result
 
+    compliance_list = [ "TEST01", "TEST05", "TEST04" ]
+    if division == "closed":
+        for test in compliance_list:
+            test_path = os.path.join(result_path, test)
+            if os.path.exists(test_path): #We dont consider missing test folders now - submission checker will do that
+                #test_pass = checker.check_compliance_dir(test_path, mlperf_model, scenario, config, "closed", system_json, sub_res)
+                test_pass = checker.check_compliance_perf_dir(test_path)
+                if test_pass and test in [ "TEST01", "TEST06" ]:
+                    #test_pass = checker.check_compliance_acc_dir(test_path, mlperf_model, config)
+                    pass # accuracy truncation script is done after submission generation. We assume here that it'll pass 
+                if test_pass:
+                    result[test] = "passed"
+                else:
+                    result[test] = "failed"
+
     acc_valid, acc_results, acc_targets, acc_limits = get_accuracy_metric(config, mlperf_model, accuracy_path)
 
     result_field = checker.RESULT_FIELD[effective_scenario]
@@ -205,7 +220,7 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res)
 
 def get_result_table(results):
 
-    headers = ["Model", "Scenario", "Accuracy", "QPS", "Latency (in ms)", "Power Efficiency (in samples/J)"]
+    headers = ["Model", "Scenario", "Accuracy", "QPS", "Latency (in ms)", "Power Efficiency (in samples/J)", "TEST01", "TEST05", "TEST04"]
     table = []
     for model in results:
         for scenario in results[model]:
@@ -253,6 +268,33 @@ def get_result_table(results):
                 if not results[model][scenario].get('power_valid', True):
                     val = "X "+val
                 row.append(val)
+            else:
+                row.append(None)
+
+            val1 = results[model][scenario].get('TEST01')
+            val2 = results[model][scenario].get('TEST05')
+            val3 = results[model][scenario].get('TEST04')
+            if val1:
+                row.append(val1)
+                if val2:
+                    row.append(val2)
+                    if val3:
+                        row.append(val3)
+                elif val3:
+                    row.append("missing")
+                    row.append(val3)
+
+            else:
+                if val2:
+                    row.append("missing")
+                    row.append(val2)
+                    if val3:
+                        row.append(val3)
+                elif val3:
+                    row.append("missing")
+                    row.append("missing")
+                    row.append(val3)
+
             table.append(row)
 
     return table, headers
@@ -415,7 +415,7 @@ input_description:
     sort: 5005
   adr.mlperf-power-client.port: 
     default: 4950
-    desc: MLPerf Power client port
+    desc: MLPerf Power server port
     sort: 5010
   clean:
     boolean: true

@@ -189,7 +189,7 @@ def preprocess(i):
                     return r
 
     if state.get("cm-mlperf-inference-results"):
-        # print(state["cm-mlperf-inference-results"])
+        #print(state["cm-mlperf-inference-results"])
         for sut in state["cm-mlperf-inference-results"]:#only one sut will be there
             # Grigori: that may not work properly since customize may have another Python than MLPerf
             # (for example, if we use virtual env)

@@ -1,3 +1,11 @@
+## V2.0.0
+   - a major update with the new CM automation recipes 
+     and GUI to compose modular AI systems and optimize 
+     them across diverse models, datasets, software and hardware:
+     * https://access.cknowledge.org/playground/?action=scripts
+     * https://access.cknowledge.org/playground/?action=howtorun
+     * https://access.cknowledge.org/playground/?action=reproduce
+
 ## V1.6.2
    - improved --help for common automations and CM scripts (automation recipes)
    - fixed a few minor bugs

@@ -1,4 +1,4 @@
-__version__ = "1.6.2.1"
+__version__ = "2.0.0"
 
 from cmind.core import access
 from cmind.core import error

@@ -5,4 +5,4 @@ git: true
 
 prefix: cm-mlops
 
-version: 1.6.2.1
+version: 2.0.0
@@ -224,10 +224,11 @@ cm pull repo --url={URL of the fork of github.com/mlcommons/ck}
 ```
 
 If you want to use stable CM snapshots of reusable automation recipes (CM scripts), 
-you can download a stable repository from Zenodo:
+you can download a stable repository from Zenodo (~5MB):
 ```bash
 cm rm repo mlcommons@ck --all
-cm pull repo --url=https://zenodo.org/records/10576423/files/cm-mlops-repo.zip
+cm pull repo --url=https://zenodo.org/records/10679842/files/cm-mlops-repo-20240219.zip
+
 ```
 
 If you use CM scripts with Python outside containers, we suggest you to set up CM Python virtual

@@ -2,7 +2,7 @@
 
 # Run and customize MLPerf benchmarks using the MLCommons CM automation framework
 
-This documentation explains how to run, customize and extend MLPerf benchmarks 
+This documentation explains how to compose, run, customize and extend MLPerf benchmarks 
 in a unified way across diverse models, data sets, software and hardware from different vendors 
 using [MLCommons Collective Mind automation recipes](https://access.cknowledge.org/playground/?action=scripts):
 

@@ -1,6 +1,8 @@
 [ [Back to MLPerf inference benchmarks index](../README.md) ]
 
-## MLPerf inference: medical imaging with 3D U-Net
+# MLPerf inference benchmark
+
+## Medical imaging with 3D U-Net
 
 ### Notes
 
@@ -9,16 +11,22 @@ with respect to the reference floating point model. Both models can be submitter
 
 Please check [MLPerf inference GitHub](https://github.com/mlcommons/inference) for more details.
 
-### Install CM
+### Run using the [MLCommons CM framework](https://github.com/mlcommons/ck)
+
+*From Feb 2024, we suggest you to use [this GUI](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725)
+ to configure MLPerf inference benchmark, generate CM commands to run it across different implementations, models, data sets, software
+ and hardware, and prepare your submissions.*
 
-Please follow this [guide](../README.md#install-cm-automation-language) 
-to install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339),
-pull the repository with the CM automation recipes for MLPerf and 
-set up virtual environment to run MLPerf benchmarks.
+### A few ready-to-use CM commands
 
-### Run MLPerf via CM
+Install MLCommons CM automation framework with automation recipes for MLPerf as described [here](../../../installation.md).
 
 The following guides explain how to run different implementations of this benchmark via CM:
 
 * [MLCommons Reference implementation in Python](README_reference.md)
-* [NVIDIA implementation](README_nvidia.md)
+* [NVIDIA optimized implementation (GPU)](README_nvidia.md)
+
+### Questions? Suggestions?
+
+Check the [MLCommons Task Force on Automation and Reproducibility](../../../taskforce.md) 
+and get in touch via [public Discord server](https://discord.gg/JjWNWXKxwT).
@@ -1,11 +1,16 @@
-[ [Back to the common setup](README.md) ]
+[ [Back to index](README.md) ]
 
 ## Prepare Nvidia software
 
 You need to install TensorRT and set up the configuration files as detailed [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/reproduce-mlperf-inference-nvidia/README-about.md).
 
 ## Run this benchmark via CM
 
+*Note: from Feb 2024, we suggest you to use [this GUI](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725)
+ to configure MLPerf inference benchmark, generate CM commands to run it across different implementations, models, data sets, software
+ and hardware, and prepare your submissions.*
+
+
 ### Do a test run to detect and record the system performance
 
 ```
@@ -43,14 +48,10 @@ cmr "generate-run-cmds inference _populate-readme _all-scenarios" \
 
 Follow [this guide](../Submission.md) to generate the submission tree and upload your results.
 
-
-### Run individual scenarios for testing and optimization
-
-TBD
-
 ### Questions? Suggestions?
 
-Don't hesitate to get in touch via [public Discord server](https://discord.gg/JjWNWXKxwT).
+Check the [MLCommons Task Force on Automation and Reproducibility](../../../taskforce.md) 
+and get in touch via [public Discord server](https://discord.gg/JjWNWXKxwT).
 
 ### Acknowledgments
 

@@ -1,8 +1,11 @@
-[ [Back to the common setup](README.md) ]
-
+[ [Back to index](README.md) ]
 
 ## Run this benchmark via CM
 
+*Note: from Feb 2024, we suggest you to use [this GUI](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725)
+ to configure MLPerf inference benchmark, generate CM commands to run it across different implementations, models, data sets, software
+ and hardware, and prepare your submissions.*
+
 
 ### Do a test run to detect and record the system performance
 
@@ -51,11 +54,7 @@ cmr "generate inference submission" --results_dir=$HOME/inference_3.1_results/va
 --hw_notes_extra="Result taken by NAME" --quiet
 ```
 
-
-### Run individual scenarios for testing and optimization
-
-TBD
-
 ### Questions? Suggestions?
 
-Don't hesitate to get in touch via [public Discord server](https://discord.gg/JjWNWXKxwT).
+Check the [MLCommons Task Force on Automation and Reproducibility](../../../taskforce.md) 
+and get in touch via [public Discord server](https://discord.gg/JjWNWXKxwT).