Merge from CTuning (#1049)

mlcommons · Jan 8, 2024 · 2d0adb0 · 2d0adb0
2 parents b51bf18 + af871a1
commit 2d0adb0
Show file tree

Hide file tree

Showing 12 changed files with 216 additions and 31 deletions.
diff --git a/.gitignore b/.gitignore
@@ -16,5 +16,5 @@ wheels/
 htmlcov
 *tmp/
 *tmp-ck-*/
-cache/
+local/cache/
 
diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py
@@ -2462,8 +2462,8 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a
                 update_tags_from_env_with_prefix = d.get("update_tags_from_env_with_prefix", {})
                 for t in update_tags_from_env_with_prefix:
                     for key in update_tags_from_env_with_prefix[t]:
-                        if env.get(key, '').strip() != '':
-                            d['tags']+=","+t+env[key]
+                        if str(env.get(key, '')).strip() != '':
+                            d['tags']+=","+t+str(env[key])
 
                 for key in clean_env_keys_deps:
                     if '?' in key or '*' in key:

diff --git a/cm-mlops/script/calibrate-model-for.qaic/customize.py b/cm-mlops/script/calibrate-model-for.qaic/customize.py
@@ -24,7 +24,7 @@ def preprocess(i):
     if r['return'] > 0:
         return r
     cmd = r['cmd']
-    
+
     print("Profiling from "+ os.getcwd())
 
     env['CM_RUN_CMD'] = cmd
@@ -83,8 +83,10 @@ def postprocess(i):
     profile_file_path = os.path.join(os.getcwd(), "profile.yaml")
     env['CM_QAIC_MODEL_PROFILE_WITH_PATH'] = profile_file_path
 
-    input_layer_name = env.get('CM_ML_MODEL_INPUT_LAYER_NAME', 'images:0')
-
+    if env.get('CM_ML_MODEL_INPUT_LAYER_NAME', '') != '':
+        input_layer_names = [ env.get('CM_ML_MODEL_INPUT_LAYER_NAME') ]
+    else:
+        input_layer_names = [ "images:0", "images/:0" ]
 
     output_layer_names_conf = [ [], [] ]
     output_layer_names_loc = [ [], [] ]
@@ -101,7 +103,7 @@ def postprocess(i):
         "/TopK/:0",
         "/TopK_1/:0",
         "/TopK_2/:0",
-        "/TopK_3/:0"
+        "/TopK_3/:0",
         "/TopK_4/:0"
         ]
 
@@ -133,22 +135,18 @@ def postprocess(i):
                     if type(doc) == list:
 
                         node_names = [ k['NodeOutputName'] for k in doc]
-                        #print(node_names)
                         oindex = None
 
                         for output in output_layer_names_loc:
                             if output[0] in node_names:
-                                print(output[0])
                                 oindex = output_layer_names_loc.index(output)
-                                print(oindex)
                                 break
 
                         if oindex is None:
                             return {'return': 1, 'error': 'Output node names not found for the given retinanet model'}
 
                         for k in doc:
-                            #print(k['NodeOutputName'])
-                            if k["NodeOutputName"] == input_layer_name:
+                            if k["NodeOutputName"] in input_layer_names:
                                 min_val = k['Min']
                                 max_val = k['Max']
                                 scale, offset = get_scale_offset(min_val, max_val)
@@ -164,8 +162,8 @@ def postprocess(i):
                                     output_max_val_loc = max_val
                                 loc_scale, loc_offset = get_scale_offset(min_val, max_val)
                                 index = output_layer_names_loc[oindex].index(k["NodeOutputName"])
-                                env[f'CM_QAIC_MODEL_RETINANET_LOC_SCALE_{index}'] = loc_scale
-                                env[f'CM_QAIC_MODEL_RETINANET_LOC_OFFSET_{index}'] = loc_offset - 128 # to uint8 is done in NMS code
+                                env[f'CM_QAIC_MODEL_RETINANET_LOC_SCALE{index}'] = loc_scale
+                                env[f'CM_QAIC_MODEL_RETINANET_LOC_OFFSET{index}'] = loc_offset - 128 # to uint8 is done in NMS code
 
                                 total_range = max_val - min_val
                                 scale = total_range/256.0
@@ -180,8 +178,8 @@ def postprocess(i):
                                     output_max_val_conf = max_val
                                 conf_scale, conf_offset = get_scale_offset(min_val, max_val)
                                 index = output_layer_names_conf[oindex].index(k["NodeOutputName"])
-                                env[f'CM_QAIC_MODEL_RETINANET_CONF_SCALE_{index}'] = conf_scale
-                                env[f'CM_QAIC_MODEL_RETINANET_CONF_OFFSET_{index}'] = conf_offset - 128 # to uint8 is done in NMS code
+                                env[f'CM_QAIC_MODEL_RETINANET_CONF_SCALE{index}'] = conf_scale
+                                env[f'CM_QAIC_MODEL_RETINANET_CONF_OFFSET{index}'] = conf_offset - 128 # to uint8 is done in NMS code
                                 total_range = max_val - min_val
                                 scale = total_range/256.0
                                 offset = round(-min_val / scale)
@@ -196,8 +194,7 @@ def postprocess(i):
             except yaml.YAMLError as exc:
                 return {'return': 1, 'error': exc}
 
-    print(env)
-    return {'return':1}
+    return {'return':0}
 
 def get_scale_offset(min_val, max_val):
     total_range = max_val - min_val

diff --git a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py
@@ -16,7 +16,7 @@ def preprocess(i):
 
     rerun = True if env.get("CM_RERUN","")!='' else False
 
-    env['CM_MLPERF_SKIP_RUN'] = "no"
+    env['CM_MLPERF_SKIP_RUN'] = env.get('CM_MLPERF_SKIP_RUN', "no")
 
     mlperf_path = env['CM_MLPERF_INFERENCE_SOURCE']
     submission_checker_dir = os.path.join(mlperf_path, "tools", "submission")

diff --git a/cm-mlops/script/get-preprocessed-dataset-generic/src/generic_preprocess.py b/cm-mlops/script/get-preprocessed-dataset-generic/src/generic_preprocess.py
@@ -172,7 +172,7 @@ def preprocess():
     interpolation_method    = os.getenv('CM_DATASET_INTERPOLATION_METHOD', '')
 
     print(("From: {}, To: {}, Size: {}, Crop: {}, InterSize: {}, 2BGR: {}, OFF: {}, VOL: '{}', FOF: {},"+
-        " DTYPE: {}, DLAYOUT: {}, EXT: {}, NORM: {}, SMEAN: {}, GCM: {}, QUANTIZE: {}, QUANT_SCALE: {}, QUANT_OFFSET: {}, CONV_UNSIGNED: {}, INTER: {}").format(
+        " DTYPE: {}, DLAYOUT: {}, EXT: {}, NORM: {}, SMEAN: {}, GCM: {}, GSTD: {}, QUANTIZE: {}, QUANT_SCALE: {}, QUANT_OFFSET: {}, CONV_UNSIGNED: {}, INTER: {}").format(
         source_dir, destination_dir, square_side, crop_percentage, inter_size, convert_to_bgr, offset, volume, fof_name,
         data_type, data_layout, new_file_extension, normalize_data, subtract_mean, given_channel_means, given_channel_stds, quantize, quant_scale, quant_offset, convert_to_unsigned, interpolation_method) )
 

diff --git a/cm-mlops/script/get-preprocessed-dataset-generic/src/preprocess_object_detection_dataset.py b/cm-mlops/script/get-preprocessed-dataset-generic/src/preprocess_object_detection_dataset.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+
+import os
+import json
+import numpy as np
+from PIL import Image
+import torch
+import torchvision
+
+SUPPORTED_EXTENSIONS = ['jpeg', 'jpg', 'gif', 'png']
+
+def load_image(image_path, target_size, data_type='uint8', convert_to_bgr=False,
+               normalize_data=False, normalize_lower=-1, normalize_upper=1,
+               subtract_mean=False, given_channel_means='', given_channel_stds='',
+               quantize=0, quant_scale=1, quant_offset=0, convert_to_unsigned=0):
+    if not convert_to_bgr:
+        image = Image.open(image_path).convert('RGB')
+    else:
+        image = Image.open(image_path).convert('BGR')
+
+    tensor_image = torchvision.transforms.functional.to_tensor(image)
+    mean = torch.as_tensor(given_channel_means)
+    std = torch.as_tensor(given_channel_stds)
+    normalized_image = (tensor_image - mean[:, None, None]) / std[:, None, None]
+
+    resized_image = torch.nn.functional.interpolate(normalized_image[None],
+                                                    size=(target_size, target_size),
+                                                    mode='bilinear')[0].numpy()
+
+    if quantize == 1:
+        resized_image = quantize_to_uint8(resized_image, quant_scale, quant_offset)
+
+    original_height, original_width, _ = resized_image.shape
+    batch_shape = (1, target_size, target_size, 3)
+    batch_data = resized_image.reshape(batch_shape)
+
+    return batch_data, original_width, original_height
+
+def quantize_to_uint8(image, scale, offset):
+    quantized_image = (image.astype(np.float64) / scale + offset).astype(np.float64)
+    output = np.round_(quantized_image)
+    output = np.clip(output, 0, 255)
+    return output.astype(np.uint8)
+
+def preprocess_files(selected_filenames, source_dir, destination_dir, square_side,
+                     data_type, convert_to_bgr, normalize_data, normalize_lower,
+                     normalize_upper, subtract_mean, given_channel_means,
+                     given_channel_stds, quantize, quant_scale, quant_offset,
+                     convert_to_unsigned, new_file_extension):
+    output_signatures = []
+
+    for current_idx, input_filename in enumerate(selected_filenames):
+        full_input_path = os.path.join(source_dir, input_filename)
+        image_data, original_width, original_height = load_image(
+            image_path=full_input_path,
+            target_size=square_side,
+            data_type=data_type,
+            convert_to_bgr=convert_to_bgr,
+            normalize_data=normalize_data,
+            normalize_lower=normalize_lower,
+            normalize_upper=normalize_upper,
+            subtract_mean=subtract_mean,
+            given_channel_means=given_channel_means,
+            given_channel_stds=given_channel_stds,
+            quantize=quantize,
+            quant_scale=quant_scale,
+            quant_offset=quant_offset,
+            convert_to_unsigned=convert_to_unsigned
+        )
+
+        output_filename = f"{input_filename.rsplit('.', 1)[0]}.{new_file_extension}" if new_file_extension else input_filename
+        full_output_path = os.path.join(destination_dir, output_filename)
+        image_data.tofile(full_output_path)
+
+        print(f"[{current_idx+1}]:  Stored {full_output_path}")
+        output_signatures.append(f'{output_filename};{original_width};{original_height}')
+
+    return output_signatures
+
+def preprocess():
+    source_directory = os.environ['CM_DATASET_PATH']
+    destination_directory = os.environ['CM_DATASET_PREPROCESSED_PATH']
+
+    intermediate_data_type = os.environ.get('CM_DATASET_INTERMEDIATE_DATA_TYPE', np.float32)
+    square_side = int(os.environ['CM_DATASET_INPUT_SQUARE_SIDE'])
+    crop_percentage = float(os.environ['CM_DATASET_CROP_FACTOR'])
+    inter_size = int(os.getenv('CM_DATASET_INTERMEDIATE_SIZE', 0))
+    convert_to_bgr = int(os.getenv('CM_DATASET_CONVERT_TO_BGR', 0))
+    offset = int(os.getenv('CM_DATASET_SUBSET_OFFSET', 0))
+    volume = int(os.environ['CM_DATASET_SIZE'])
+    fof_name = os.getenv('CM_DATASET_SUBSET_FOF', 'files.txt')
+    data_type = os.getenv('CM_DATASET_DATA_TYPE_INPUT', 'float32')
+    input_data_type = os.getenv('CM_DATASET_DATA_TYPE_INPUT', 'float32')
+    data_layout = os.getenv('CM_DATASET_DATA_LAYOUT', '').lower()
+    new_file_extension = os.getenv('CM_DATASET_PREPROCESSED_EXTENSION', '')
+    normalize_data = int(os.getenv('CM_DATASET_NORMALIZE_DATA', '0'))
+    subtract_mean = int(os.getenv('CM_DATASET_SUBTRACT_MEANS', '0'))
+    given_channel_means = os.getenv('CM_DATASET_GIVEN_CHANNEL_MEANS', '')
+    given_channel_stds = os.getenv('CM_DATASET_GIVEN_CHANNEL_STDS', '')
+    quant_scale = float(os.environ['CM_DATASET_QUANT_SCALE'])
+    quant_offset = float(os.environ['CM_DATASET_QUANT_OFFSET'])
+    quantize = int(os.environ['CM_DATASET_QUANTIZE'])  # 1 for quantize to int8
+    convert_to_unsigned = int(os.environ['CM_DATASET_CONVERT_TO_UNSIGNED'])  # 1 for int8 to uint8
+
+    images_list = os.getenv('CM_DATASET_IMAGES_LIST')
+    interpolation_method = os.getenv('CM_DATASET_INTERPOLATION_METHOD', '')
+
+    annotations_filepath = os.environ['CM_DATASET_ANNOTATIONS_FILE_PATH']
+    is_calibration = os.environ['CM_DATASET_TYPE'] == "calibration"
+    image_file = os.getenv('CM_IMAGE_FILE', '')
+
+    normalize_lower = float(os.getenv('CM_DATASET_NORMALIZE_LOWER', -1.0))
+    normalize_upper = float(os.getenv('CM_DATASET_NORMALIZE_UPPER', 1.0))
+
+    if given_channel_means:
+        given_channel_means = np.fromstring(given_channel_means, dtype=np.float32, sep=' ').astype(intermediate_data_type)
+        if convert_to_bgr:
+            given_channel_means = given_channel_means[::-1]
+
+    given_channel_stds = os.getenv('CM_DATASET_GIVEN_CHANNEL_STDS', '')
+    if given_channel_stds:
+        given_channel_stds = np.fromstring(given_channel_stds, dtype=np.float32, sep=' ').astype(intermediate_data_type)
+        if convert_to_bgr:
+            given_channel_stds = given_channel_stds[::-1]
+
+    print(f"From: {source_directory}, To: {destination_directory}, Size: {square_side}, Crop: {crop_percentage}, InterSize: {inter_size}, 2BGR: {convert_to_bgr}, " +
+      f"OFF: {offset}, VOL: '{volume}', FOF: {fof_name}, DTYPE: {data_type}, DLAYOUT: {data_layout}, EXT: {new_file_extension}, " +
+      f"NORM: {normalize_data}, SMEAN: {subtract_mean}, GCM: {given_channel_means}, GSTD: {given_channel_stds}, QUANTIZE: {quantize}, QUANT_SCALE: {quant_scale}, " +
+      f"QUANT_OFFSET: {quant_offset}, CONV_UNSIGNED: {convert_to_unsigned}, INTER: {interpolation_method}")
+
+
+    if image_file:
+        source_directory = os.path.dirname(image_file)
+        selected_filenames = [os.path.basename(image_file)]
+    else:
+        if annotations_filepath and not is_calibration:
+            with open(annotations_filepath, "r") as annotations_fh:
+                annotations_struct = json.load(annotations_fh)
+            ordered_filenames = [image_entry['file_name'] for image_entry in annotations_struct['images']]
+        elif os.path.isdir(source_directory):
+            ordered_filenames = [filename for filename in sorted(os.listdir(source_directory)) if any(filename.lower().endswith(extension) for extension in SUPPORTED_EXTENSIONS)]
+        else:
+            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), source_directory)
+
+        total_volume = len(ordered_filenames)
+
+        if offset < 0:
+            offset += total_volume
+
+        if not volume:
+            volume = total_volume - offset
+
+        selected_filenames = ordered_filenames[offset:offset + volume]
+
+    output_signatures = preprocess_files(selected_filenames, source_directory, destination_directory, square_side, data_type,
+                                         convert_to_bgr, normalize_data, normalize_lower, normalize_upper,
+                                         subtract_mean, given_channel_means, given_channel_stds, quantize,
+                                         quant_scale, quant_offset, convert_to_unsigned, new_file_extension)
+
+    fof_full_path = os.path.join(destination_directory, fof_name)
+    with open(fof_full_path, 'w') as fof_file:
+        for filename in output_signatures:
+            fof_file.write(f'{filename}\n')
+
+if __name__ == "__main__":
+    preprocess()
+
diff --git a/cm-mlops/script/get-preprocessed-dataset-openimages/_cm.json b/cm-mlops/script/get-preprocessed-dataset-openimages/_cm.json
@@ -246,6 +246,16 @@
         "CM_DATASET_QUANT_OFFSET": "114"
       }
     },
+    "quant-scale.#": {
+      "const": {
+        "CM_DATASET_QUANT_SCALE": "#"
+      }
+    },
+    "quant-offset.#": {
+      "const": {
+        "CM_DATASET_QUANT_OFFSET": "#"
+      }
+    },
     "inter.linear": {
       "group": "interpolation-method",
       "env": {

diff --git a/cm-mlops/script/get-preprocessed-dataset-openimages/customize.py b/cm-mlops/script/get-preprocessed-dataset-openimages/customize.py
@@ -13,7 +13,7 @@ def preprocess(i):
     if env.get('CM_DATASET_REFERENCE_PREPROCESSOR',"0") == "1":
         print("Using MLCommons Inference source from '" + env['CM_MLPERF_INFERENCE_SOURCE'] +"'")
 
-    if env.get('CM_MODEL_NAME', '') == 'retinanet':
+    if env.get('CM_ML_MODEL_NAME', '') == 'retinanet':
         if env.get('CM_DATASET_QUANTIZE', '') == '1':
             if env.get('CM_QAIC_MODEL_RETINANET_IMAGE_SCALE', '') != '':
                 env['CM_DATASET_QUANT_SCALE'] = env['CM_QAIC_MODEL_RETINANET_IMAGE_SCALE']

diff --git a/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py b/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py
@@ -16,7 +16,7 @@
 if os.environ.get('CM_DATASET_REFERENCE_PREPROCESSOR', '1') == "0":
     #import generic_preprocess
     #generic_preprocess.preprocess()
-    import preprocess_image_dataset as pp
+    import preprocess_object_detection_dataset as pp
     pp.preprocess()
 else:
     dataset_list = os.environ.get('CM_DATASET_ANNOTATIONS_FILE_PATH', None)

diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/README_aws_dl2q.24xlarge.md b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/README_aws_dl2q.24xlarge.md
@@ -122,5 +122,5 @@ cm run script --tags=generate-run-cmds,inference,_accuracy-only --device=qaic --
 --adr.mlperf-inference-implementation.tags=_bs.1,_dl2q.24xlarge --execution-mode=valid --quiet
 ```
 
-The expected accuracy is 37.xx
+The expected accuracy is 37.234
 
diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml
@@ -213,6 +213,11 @@ deps:
       - openimages-preprocessed
       - dataset-preprocessed
     tags: get,dataset,preprocessed,openimages,_for.retinanet.onnx,_NCHW,_validation,_custom-annotations
+    update_tags_from_env_with_prefix:
+      _quant-scale.:
+        - CM_QAIC_MODEL_RETINANET_IMAGE_OFFSET
+      _quant-offset.:
+        - CM_QAIC_MODEL_RETINANET_IMAGE_SCALE
     skip_if_env:
       CM_MLPERF_SKIP_RUN:
         - yes
@@ -284,6 +289,9 @@ variations:
           CM_MLPERF_SKIP_RUN:
             - yes
       - tags: get,lib,protobuf,_tag.v3.11.4
+        skip_if_env:
+          CM_MLPERF_SKIP_RUN:
+            - yes
       - tags: set,device,mode,qaic
         enable_if_env:
           CM_QAIC_VC:

diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py
@@ -67,16 +67,19 @@ def preprocess(i):
             env['+ CXXFLAGS'].append("-DMODEL_RX50")
 
         keys = [ 'LOC_OFFSET', 'LOC_SCALE', 'CONF_OFFSET', 'CONF_SCALE' ]
-        for i in range(0,4):
-            keys.append(f'LOC_OFFSET_{i}')
-            keys.append(f'LOC_SCALE_{i}')
-            keys.append(f'CONF_OFFSET_{i}')
-            keys.append(f'CONF_SCALE_{i}')
+
+        if env.get('CM_RETINANET_USE_MULTIPLE_SCALES_OFFSETS', '') == 'yes':
+            env['+ CXXFLAGS'].append("-DUSE_MULTIPLE_SCALES_OFFSETS=1")
+            for j in range(0,4):
+                keys.append(f'LOC_OFFSET{j}')
+                keys.append(f'LOC_SCALE{j}')
+                keys.append(f'CONF_OFFSET{j}')
+                keys.append(f'CONF_SCALE{j}')
 
         for key in keys:
-            value = env.get('CM_QAIC_MODEL_RETINANET_'+key)
-            if value:
-                env['+ CXXFLAGS'].append(f" -D{key}={value} ")
+            value = env.get('CM_QAIC_MODEL_RETINANET_'+key, '')
+            if value != '':
+                env['+ CXXFLAGS'].append(f" -D{key}_={value} ")
 
     if env.get('CM_BENCHMARK', '') == 'NETWORK_BERT_SERVER':
         source_files.append(os.path.join(kilt_root, "benchmarks", "network", "bert", "server", "pack.cpp"))
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,5 +16,5 @@ wheels/ @@
     htmlcov
     *tmp/
     *tmp-ck-*/
-    cache/
+    local/cache/