diff --git a/.gitignore b/.gitignore index 7045c742f7..96523fae44 100644 --- a/.gitignore +++ b/.gitignore @@ -16,5 +16,5 @@ wheels/ htmlcov *tmp/ *tmp-ck-*/ -cache/ +local/cache/ diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index 8080000c12..72f167a94f 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -2462,8 +2462,8 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a update_tags_from_env_with_prefix = d.get("update_tags_from_env_with_prefix", {}) for t in update_tags_from_env_with_prefix: for key in update_tags_from_env_with_prefix[t]: - if env.get(key, '').strip() != '': - d['tags']+=","+t+env[key] + if str(env.get(key, '')).strip() != '': + d['tags']+=","+t+str(env[key]) for key in clean_env_keys_deps: if '?' in key or '*' in key: diff --git a/cm-mlops/script/calibrate-model-for.qaic/customize.py b/cm-mlops/script/calibrate-model-for.qaic/customize.py index 60aa0352b0..62c4dbdbae 100644 --- a/cm-mlops/script/calibrate-model-for.qaic/customize.py +++ b/cm-mlops/script/calibrate-model-for.qaic/customize.py @@ -24,7 +24,7 @@ def preprocess(i): if r['return'] > 0: return r cmd = r['cmd'] - + print("Profiling from "+ os.getcwd()) env['CM_RUN_CMD'] = cmd @@ -83,8 +83,10 @@ def postprocess(i): profile_file_path = os.path.join(os.getcwd(), "profile.yaml") env['CM_QAIC_MODEL_PROFILE_WITH_PATH'] = profile_file_path - input_layer_name = env.get('CM_ML_MODEL_INPUT_LAYER_NAME', 'images:0') - + if env.get('CM_ML_MODEL_INPUT_LAYER_NAME', '') != '': + input_layer_names = [ env.get('CM_ML_MODEL_INPUT_LAYER_NAME') ] + else: + input_layer_names = [ "images:0", "images/:0" ] output_layer_names_conf = [ [], [] ] output_layer_names_loc = [ [], [] ] @@ -101,7 +103,7 @@ def postprocess(i): "/TopK/:0", "/TopK_1/:0", "/TopK_2/:0", - "/TopK_3/:0" + "/TopK_3/:0", "/TopK_4/:0" ] @@ -133,22 +135,18 @@ def postprocess(i): if type(doc) == list: node_names = [ k['NodeOutputName'] for k in doc] - #print(node_names) oindex = None for output in output_layer_names_loc: if output[0] in node_names: - print(output[0]) oindex = output_layer_names_loc.index(output) - print(oindex) break if oindex is None: return {'return': 1, 'error': 'Output node names not found for the given retinanet model'} for k in doc: - #print(k['NodeOutputName']) - if k["NodeOutputName"] == input_layer_name: + if k["NodeOutputName"] in input_layer_names: min_val = k['Min'] max_val = k['Max'] scale, offset = get_scale_offset(min_val, max_val) @@ -164,8 +162,8 @@ def postprocess(i): output_max_val_loc = max_val loc_scale, loc_offset = get_scale_offset(min_val, max_val) index = output_layer_names_loc[oindex].index(k["NodeOutputName"]) - env[f'CM_QAIC_MODEL_RETINANET_LOC_SCALE_{index}'] = loc_scale - env[f'CM_QAIC_MODEL_RETINANET_LOC_OFFSET_{index}'] = loc_offset - 128 # to uint8 is done in NMS code + env[f'CM_QAIC_MODEL_RETINANET_LOC_SCALE{index}'] = loc_scale + env[f'CM_QAIC_MODEL_RETINANET_LOC_OFFSET{index}'] = loc_offset - 128 # to uint8 is done in NMS code total_range = max_val - min_val scale = total_range/256.0 @@ -180,8 +178,8 @@ def postprocess(i): output_max_val_conf = max_val conf_scale, conf_offset = get_scale_offset(min_val, max_val) index = output_layer_names_conf[oindex].index(k["NodeOutputName"]) - env[f'CM_QAIC_MODEL_RETINANET_CONF_SCALE_{index}'] = conf_scale - env[f'CM_QAIC_MODEL_RETINANET_CONF_OFFSET_{index}'] = conf_offset - 128 # to uint8 is done in NMS code + env[f'CM_QAIC_MODEL_RETINANET_CONF_SCALE{index}'] = conf_scale + env[f'CM_QAIC_MODEL_RETINANET_CONF_OFFSET{index}'] = conf_offset - 128 # to uint8 is done in NMS code total_range = max_val - min_val scale = total_range/256.0 offset = round(-min_val / scale) @@ -196,8 +194,7 @@ def postprocess(i): except yaml.YAMLError as exc: return {'return': 1, 'error': exc} - print(env) - return {'return':1} + return {'return':0} def get_scale_offset(min_val, max_val): total_range = max_val - min_val diff --git a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py index 3d7494a29f..2bc0a77cd0 100644 --- a/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py +++ b/cm-mlops/script/generate-mlperf-inference-user-conf/customize.py @@ -16,7 +16,7 @@ def preprocess(i): rerun = True if env.get("CM_RERUN","")!='' else False - env['CM_MLPERF_SKIP_RUN'] = "no" + env['CM_MLPERF_SKIP_RUN'] = env.get('CM_MLPERF_SKIP_RUN', "no") mlperf_path = env['CM_MLPERF_INFERENCE_SOURCE'] submission_checker_dir = os.path.join(mlperf_path, "tools", "submission") diff --git a/cm-mlops/script/get-preprocessed-dataset-generic/src/generic_preprocess.py b/cm-mlops/script/get-preprocessed-dataset-generic/src/generic_preprocess.py index c21e71c2e5..752895db88 100644 --- a/cm-mlops/script/get-preprocessed-dataset-generic/src/generic_preprocess.py +++ b/cm-mlops/script/get-preprocessed-dataset-generic/src/generic_preprocess.py @@ -172,7 +172,7 @@ def preprocess(): interpolation_method = os.getenv('CM_DATASET_INTERPOLATION_METHOD', '') print(("From: {}, To: {}, Size: {}, Crop: {}, InterSize: {}, 2BGR: {}, OFF: {}, VOL: '{}', FOF: {},"+ - " DTYPE: {}, DLAYOUT: {}, EXT: {}, NORM: {}, SMEAN: {}, GCM: {}, QUANTIZE: {}, QUANT_SCALE: {}, QUANT_OFFSET: {}, CONV_UNSIGNED: {}, INTER: {}").format( + " DTYPE: {}, DLAYOUT: {}, EXT: {}, NORM: {}, SMEAN: {}, GCM: {}, GSTD: {}, QUANTIZE: {}, QUANT_SCALE: {}, QUANT_OFFSET: {}, CONV_UNSIGNED: {}, INTER: {}").format( source_dir, destination_dir, square_side, crop_percentage, inter_size, convert_to_bgr, offset, volume, fof_name, data_type, data_layout, new_file_extension, normalize_data, subtract_mean, given_channel_means, given_channel_stds, quantize, quant_scale, quant_offset, convert_to_unsigned, interpolation_method) ) diff --git a/cm-mlops/script/get-preprocessed-dataset-generic/src/preprocess_object_detection_dataset.py b/cm-mlops/script/get-preprocessed-dataset-generic/src/preprocess_object_detection_dataset.py new file mode 100644 index 0000000000..84e18ee397 --- /dev/null +++ b/cm-mlops/script/get-preprocessed-dataset-generic/src/preprocess_object_detection_dataset.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 + +import os +import json +import numpy as np +from PIL import Image +import torch +import torchvision + +SUPPORTED_EXTENSIONS = ['jpeg', 'jpg', 'gif', 'png'] + +def load_image(image_path, target_size, data_type='uint8', convert_to_bgr=False, + normalize_data=False, normalize_lower=-1, normalize_upper=1, + subtract_mean=False, given_channel_means='', given_channel_stds='', + quantize=0, quant_scale=1, quant_offset=0, convert_to_unsigned=0): + if not convert_to_bgr: + image = Image.open(image_path).convert('RGB') + else: + image = Image.open(image_path).convert('BGR') + + tensor_image = torchvision.transforms.functional.to_tensor(image) + mean = torch.as_tensor(given_channel_means) + std = torch.as_tensor(given_channel_stds) + normalized_image = (tensor_image - mean[:, None, None]) / std[:, None, None] + + resized_image = torch.nn.functional.interpolate(normalized_image[None], + size=(target_size, target_size), + mode='bilinear')[0].numpy() + + if quantize == 1: + resized_image = quantize_to_uint8(resized_image, quant_scale, quant_offset) + + original_height, original_width, _ = resized_image.shape + batch_shape = (1, target_size, target_size, 3) + batch_data = resized_image.reshape(batch_shape) + + return batch_data, original_width, original_height + +def quantize_to_uint8(image, scale, offset): + quantized_image = (image.astype(np.float64) / scale + offset).astype(np.float64) + output = np.round_(quantized_image) + output = np.clip(output, 0, 255) + return output.astype(np.uint8) + +def preprocess_files(selected_filenames, source_dir, destination_dir, square_side, + data_type, convert_to_bgr, normalize_data, normalize_lower, + normalize_upper, subtract_mean, given_channel_means, + given_channel_stds, quantize, quant_scale, quant_offset, + convert_to_unsigned, new_file_extension): + output_signatures = [] + + for current_idx, input_filename in enumerate(selected_filenames): + full_input_path = os.path.join(source_dir, input_filename) + image_data, original_width, original_height = load_image( + image_path=full_input_path, + target_size=square_side, + data_type=data_type, + convert_to_bgr=convert_to_bgr, + normalize_data=normalize_data, + normalize_lower=normalize_lower, + normalize_upper=normalize_upper, + subtract_mean=subtract_mean, + given_channel_means=given_channel_means, + given_channel_stds=given_channel_stds, + quantize=quantize, + quant_scale=quant_scale, + quant_offset=quant_offset, + convert_to_unsigned=convert_to_unsigned + ) + + output_filename = f"{input_filename.rsplit('.', 1)[0]}.{new_file_extension}" if new_file_extension else input_filename + full_output_path = os.path.join(destination_dir, output_filename) + image_data.tofile(full_output_path) + + print(f"[{current_idx+1}]: Stored {full_output_path}") + output_signatures.append(f'{output_filename};{original_width};{original_height}') + + return output_signatures + +def preprocess(): + source_directory = os.environ['CM_DATASET_PATH'] + destination_directory = os.environ['CM_DATASET_PREPROCESSED_PATH'] + + intermediate_data_type = os.environ.get('CM_DATASET_INTERMEDIATE_DATA_TYPE', np.float32) + square_side = int(os.environ['CM_DATASET_INPUT_SQUARE_SIDE']) + crop_percentage = float(os.environ['CM_DATASET_CROP_FACTOR']) + inter_size = int(os.getenv('CM_DATASET_INTERMEDIATE_SIZE', 0)) + convert_to_bgr = int(os.getenv('CM_DATASET_CONVERT_TO_BGR', 0)) + offset = int(os.getenv('CM_DATASET_SUBSET_OFFSET', 0)) + volume = int(os.environ['CM_DATASET_SIZE']) + fof_name = os.getenv('CM_DATASET_SUBSET_FOF', 'files.txt') + data_type = os.getenv('CM_DATASET_DATA_TYPE_INPUT', 'float32') + input_data_type = os.getenv('CM_DATASET_DATA_TYPE_INPUT', 'float32') + data_layout = os.getenv('CM_DATASET_DATA_LAYOUT', '').lower() + new_file_extension = os.getenv('CM_DATASET_PREPROCESSED_EXTENSION', '') + normalize_data = int(os.getenv('CM_DATASET_NORMALIZE_DATA', '0')) + subtract_mean = int(os.getenv('CM_DATASET_SUBTRACT_MEANS', '0')) + given_channel_means = os.getenv('CM_DATASET_GIVEN_CHANNEL_MEANS', '') + given_channel_stds = os.getenv('CM_DATASET_GIVEN_CHANNEL_STDS', '') + quant_scale = float(os.environ['CM_DATASET_QUANT_SCALE']) + quant_offset = float(os.environ['CM_DATASET_QUANT_OFFSET']) + quantize = int(os.environ['CM_DATASET_QUANTIZE']) # 1 for quantize to int8 + convert_to_unsigned = int(os.environ['CM_DATASET_CONVERT_TO_UNSIGNED']) # 1 for int8 to uint8 + + images_list = os.getenv('CM_DATASET_IMAGES_LIST') + interpolation_method = os.getenv('CM_DATASET_INTERPOLATION_METHOD', '') + + annotations_filepath = os.environ['CM_DATASET_ANNOTATIONS_FILE_PATH'] + is_calibration = os.environ['CM_DATASET_TYPE'] == "calibration" + image_file = os.getenv('CM_IMAGE_FILE', '') + + normalize_lower = float(os.getenv('CM_DATASET_NORMALIZE_LOWER', -1.0)) + normalize_upper = float(os.getenv('CM_DATASET_NORMALIZE_UPPER', 1.0)) + + if given_channel_means: + given_channel_means = np.fromstring(given_channel_means, dtype=np.float32, sep=' ').astype(intermediate_data_type) + if convert_to_bgr: + given_channel_means = given_channel_means[::-1] + + given_channel_stds = os.getenv('CM_DATASET_GIVEN_CHANNEL_STDS', '') + if given_channel_stds: + given_channel_stds = np.fromstring(given_channel_stds, dtype=np.float32, sep=' ').astype(intermediate_data_type) + if convert_to_bgr: + given_channel_stds = given_channel_stds[::-1] + + print(f"From: {source_directory}, To: {destination_directory}, Size: {square_side}, Crop: {crop_percentage}, InterSize: {inter_size}, 2BGR: {convert_to_bgr}, " + + f"OFF: {offset}, VOL: '{volume}', FOF: {fof_name}, DTYPE: {data_type}, DLAYOUT: {data_layout}, EXT: {new_file_extension}, " + + f"NORM: {normalize_data}, SMEAN: {subtract_mean}, GCM: {given_channel_means}, GSTD: {given_channel_stds}, QUANTIZE: {quantize}, QUANT_SCALE: {quant_scale}, " + + f"QUANT_OFFSET: {quant_offset}, CONV_UNSIGNED: {convert_to_unsigned}, INTER: {interpolation_method}") + + + if image_file: + source_directory = os.path.dirname(image_file) + selected_filenames = [os.path.basename(image_file)] + else: + if annotations_filepath and not is_calibration: + with open(annotations_filepath, "r") as annotations_fh: + annotations_struct = json.load(annotations_fh) + ordered_filenames = [image_entry['file_name'] for image_entry in annotations_struct['images']] + elif os.path.isdir(source_directory): + ordered_filenames = [filename for filename in sorted(os.listdir(source_directory)) if any(filename.lower().endswith(extension) for extension in SUPPORTED_EXTENSIONS)] + else: + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), source_directory) + + total_volume = len(ordered_filenames) + + if offset < 0: + offset += total_volume + + if not volume: + volume = total_volume - offset + + selected_filenames = ordered_filenames[offset:offset + volume] + + output_signatures = preprocess_files(selected_filenames, source_directory, destination_directory, square_side, data_type, + convert_to_bgr, normalize_data, normalize_lower, normalize_upper, + subtract_mean, given_channel_means, given_channel_stds, quantize, + quant_scale, quant_offset, convert_to_unsigned, new_file_extension) + + fof_full_path = os.path.join(destination_directory, fof_name) + with open(fof_full_path, 'w') as fof_file: + for filename in output_signatures: + fof_file.write(f'{filename}\n') + +if __name__ == "__main__": + preprocess() + diff --git a/cm-mlops/script/get-preprocessed-dataset-openimages/_cm.json b/cm-mlops/script/get-preprocessed-dataset-openimages/_cm.json index 98110f927b..e15cd1fe52 100644 --- a/cm-mlops/script/get-preprocessed-dataset-openimages/_cm.json +++ b/cm-mlops/script/get-preprocessed-dataset-openimages/_cm.json @@ -246,6 +246,16 @@ "CM_DATASET_QUANT_OFFSET": "114" } }, + "quant-scale.#": { + "const": { + "CM_DATASET_QUANT_SCALE": "#" + } + }, + "quant-offset.#": { + "const": { + "CM_DATASET_QUANT_OFFSET": "#" + } + }, "inter.linear": { "group": "interpolation-method", "env": { diff --git a/cm-mlops/script/get-preprocessed-dataset-openimages/customize.py b/cm-mlops/script/get-preprocessed-dataset-openimages/customize.py index 92ab86fead..fd2adcb5f6 100644 --- a/cm-mlops/script/get-preprocessed-dataset-openimages/customize.py +++ b/cm-mlops/script/get-preprocessed-dataset-openimages/customize.py @@ -13,7 +13,7 @@ def preprocess(i): if env.get('CM_DATASET_REFERENCE_PREPROCESSOR',"0") == "1": print("Using MLCommons Inference source from '" + env['CM_MLPERF_INFERENCE_SOURCE'] +"'") - if env.get('CM_MODEL_NAME', '') == 'retinanet': + if env.get('CM_ML_MODEL_NAME', '') == 'retinanet': if env.get('CM_DATASET_QUANTIZE', '') == '1': if env.get('CM_QAIC_MODEL_RETINANET_IMAGE_SCALE', '') != '': env['CM_DATASET_QUANT_SCALE'] = env['CM_QAIC_MODEL_RETINANET_IMAGE_SCALE'] diff --git a/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py b/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py index 33aee9d6f4..71f7554b32 100644 --- a/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py +++ b/cm-mlops/script/get-preprocessed-dataset-openimages/preprocess.py @@ -16,7 +16,7 @@ if os.environ.get('CM_DATASET_REFERENCE_PREPROCESSOR', '1') == "0": #import generic_preprocess #generic_preprocess.preprocess() - import preprocess_image_dataset as pp + import preprocess_object_detection_dataset as pp pp.preprocess() else: dataset_list = os.environ.get('CM_DATASET_ANNOTATIONS_FILE_PATH', None) diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/README_aws_dl2q.24xlarge.md b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/README_aws_dl2q.24xlarge.md index 8cf35760aa..e904bacb35 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/README_aws_dl2q.24xlarge.md +++ b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/README_aws_dl2q.24xlarge.md @@ -122,5 +122,5 @@ cm run script --tags=generate-run-cmds,inference,_accuracy-only --device=qaic -- --adr.mlperf-inference-implementation.tags=_bs.1,_dl2q.24xlarge --execution-mode=valid --quiet ``` -The expected accuracy is 37.xx +The expected accuracy is 37.234 diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml index 9dbb27bec9..601e4e0dc2 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml @@ -213,6 +213,11 @@ deps: - openimages-preprocessed - dataset-preprocessed tags: get,dataset,preprocessed,openimages,_for.retinanet.onnx,_NCHW,_validation,_custom-annotations + update_tags_from_env_with_prefix: + _quant-scale.: + - CM_QAIC_MODEL_RETINANET_IMAGE_OFFSET + _quant-offset.: + - CM_QAIC_MODEL_RETINANET_IMAGE_SCALE skip_if_env: CM_MLPERF_SKIP_RUN: - yes @@ -284,6 +289,9 @@ variations: CM_MLPERF_SKIP_RUN: - yes - tags: get,lib,protobuf,_tag.v3.11.4 + skip_if_env: + CM_MLPERF_SKIP_RUN: + - yes - tags: set,device,mode,qaic enable_if_env: CM_QAIC_VC: diff --git a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py index 1d2411669c..ae7cc0e40a 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py @@ -67,16 +67,19 @@ def preprocess(i): env['+ CXXFLAGS'].append("-DMODEL_RX50") keys = [ 'LOC_OFFSET', 'LOC_SCALE', 'CONF_OFFSET', 'CONF_SCALE' ] - for i in range(0,4): - keys.append(f'LOC_OFFSET_{i}') - keys.append(f'LOC_SCALE_{i}') - keys.append(f'CONF_OFFSET_{i}') - keys.append(f'CONF_SCALE_{i}') + + if env.get('CM_RETINANET_USE_MULTIPLE_SCALES_OFFSETS', '') == 'yes': + env['+ CXXFLAGS'].append("-DUSE_MULTIPLE_SCALES_OFFSETS=1") + for j in range(0,4): + keys.append(f'LOC_OFFSET{j}') + keys.append(f'LOC_SCALE{j}') + keys.append(f'CONF_OFFSET{j}') + keys.append(f'CONF_SCALE{j}') for key in keys: - value = env.get('CM_QAIC_MODEL_RETINANET_'+key) - if value: - env['+ CXXFLAGS'].append(f" -D{key}={value} ") + value = env.get('CM_QAIC_MODEL_RETINANET_'+key, '') + if value != '': + env['+ CXXFLAGS'].append(f" -D{key}_={value} ") if env.get('CM_BENCHMARK', '') == 'NETWORK_BERT_SERVER': source_files.append(os.path.join(kilt_root, "benchmarks", "network", "bert", "server", "pack.cpp"))