Skip to content

Commit

Permalink
Merge from CTuning (#1049)
Browse files Browse the repository at this point in the history
  • Loading branch information
gfursin authored Jan 8, 2024
2 parents b51bf18 + af871a1 commit 2d0adb0
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ wheels/
htmlcov
*tmp/
*tmp-ck-*/
cache/
local/cache/

4 changes: 2 additions & 2 deletions cm-mlops/automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2462,8 +2462,8 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a
update_tags_from_env_with_prefix = d.get("update_tags_from_env_with_prefix", {})
for t in update_tags_from_env_with_prefix:
for key in update_tags_from_env_with_prefix[t]:
if env.get(key, '').strip() != '':
d['tags']+=","+t+env[key]
if str(env.get(key, '')).strip() != '':
d['tags']+=","+t+str(env[key])

for key in clean_env_keys_deps:
if '?' in key or '*' in key:
Expand Down
27 changes: 12 additions & 15 deletions cm-mlops/script/calibrate-model-for.qaic/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def preprocess(i):
if r['return'] > 0:
return r
cmd = r['cmd']

print("Profiling from "+ os.getcwd())

env['CM_RUN_CMD'] = cmd
Expand Down Expand Up @@ -83,8 +83,10 @@ def postprocess(i):
profile_file_path = os.path.join(os.getcwd(), "profile.yaml")
env['CM_QAIC_MODEL_PROFILE_WITH_PATH'] = profile_file_path

input_layer_name = env.get('CM_ML_MODEL_INPUT_LAYER_NAME', 'images:0')

if env.get('CM_ML_MODEL_INPUT_LAYER_NAME', '') != '':
input_layer_names = [ env.get('CM_ML_MODEL_INPUT_LAYER_NAME') ]
else:
input_layer_names = [ "images:0", "images/:0" ]

output_layer_names_conf = [ [], [] ]
output_layer_names_loc = [ [], [] ]
Expand All @@ -101,7 +103,7 @@ def postprocess(i):
"/TopK/:0",
"/TopK_1/:0",
"/TopK_2/:0",
"/TopK_3/:0"
"/TopK_3/:0",
"/TopK_4/:0"
]

Expand Down Expand Up @@ -133,22 +135,18 @@ def postprocess(i):
if type(doc) == list:

node_names = [ k['NodeOutputName'] for k in doc]
#print(node_names)
oindex = None

for output in output_layer_names_loc:
if output[0] in node_names:
print(output[0])
oindex = output_layer_names_loc.index(output)
print(oindex)
break

if oindex is None:
return {'return': 1, 'error': 'Output node names not found for the given retinanet model'}

for k in doc:
#print(k['NodeOutputName'])
if k["NodeOutputName"] == input_layer_name:
if k["NodeOutputName"] in input_layer_names:
min_val = k['Min']
max_val = k['Max']
scale, offset = get_scale_offset(min_val, max_val)
Expand All @@ -164,8 +162,8 @@ def postprocess(i):
output_max_val_loc = max_val
loc_scale, loc_offset = get_scale_offset(min_val, max_val)
index = output_layer_names_loc[oindex].index(k["NodeOutputName"])
env[f'CM_QAIC_MODEL_RETINANET_LOC_SCALE_{index}'] = loc_scale
env[f'CM_QAIC_MODEL_RETINANET_LOC_OFFSET_{index}'] = loc_offset - 128 # to uint8 is done in NMS code
env[f'CM_QAIC_MODEL_RETINANET_LOC_SCALE{index}'] = loc_scale
env[f'CM_QAIC_MODEL_RETINANET_LOC_OFFSET{index}'] = loc_offset - 128 # to uint8 is done in NMS code

total_range = max_val - min_val
scale = total_range/256.0
Expand All @@ -180,8 +178,8 @@ def postprocess(i):
output_max_val_conf = max_val
conf_scale, conf_offset = get_scale_offset(min_val, max_val)
index = output_layer_names_conf[oindex].index(k["NodeOutputName"])
env[f'CM_QAIC_MODEL_RETINANET_CONF_SCALE_{index}'] = conf_scale
env[f'CM_QAIC_MODEL_RETINANET_CONF_OFFSET_{index}'] = conf_offset - 128 # to uint8 is done in NMS code
env[f'CM_QAIC_MODEL_RETINANET_CONF_SCALE{index}'] = conf_scale
env[f'CM_QAIC_MODEL_RETINANET_CONF_OFFSET{index}'] = conf_offset - 128 # to uint8 is done in NMS code
total_range = max_val - min_val
scale = total_range/256.0
offset = round(-min_val / scale)
Expand All @@ -196,8 +194,7 @@ def postprocess(i):
except yaml.YAMLError as exc:
return {'return': 1, 'error': exc}

print(env)
return {'return':1}
return {'return':0}

def get_scale_offset(min_val, max_val):
total_range = max_val - min_val
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def preprocess(i):

rerun = True if env.get("CM_RERUN","")!='' else False

env['CM_MLPERF_SKIP_RUN'] = "no"
env['CM_MLPERF_SKIP_RUN'] = env.get('CM_MLPERF_SKIP_RUN', "no")

mlperf_path = env['CM_MLPERF_INFERENCE_SOURCE']
submission_checker_dir = os.path.join(mlperf_path, "tools", "submission")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def preprocess():
interpolation_method = os.getenv('CM_DATASET_INTERPOLATION_METHOD', '')

print(("From: {}, To: {}, Size: {}, Crop: {}, InterSize: {}, 2BGR: {}, OFF: {}, VOL: '{}', FOF: {},"+
" DTYPE: {}, DLAYOUT: {}, EXT: {}, NORM: {}, SMEAN: {}, GCM: {}, QUANTIZE: {}, QUANT_SCALE: {}, QUANT_OFFSET: {}, CONV_UNSIGNED: {}, INTER: {}").format(
" DTYPE: {}, DLAYOUT: {}, EXT: {}, NORM: {}, SMEAN: {}, GCM: {}, GSTD: {}, QUANTIZE: {}, QUANT_SCALE: {}, QUANT_OFFSET: {}, CONV_UNSIGNED: {}, INTER: {}").format(
source_dir, destination_dir, square_side, crop_percentage, inter_size, convert_to_bgr, offset, volume, fof_name,
data_type, data_layout, new_file_extension, normalize_data, subtract_mean, given_channel_means, given_channel_stds, quantize, quant_scale, quant_offset, convert_to_unsigned, interpolation_method) )

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/usr/bin/env python3

import os
import json
import numpy as np
from PIL import Image
import torch
import torchvision

SUPPORTED_EXTENSIONS = ['jpeg', 'jpg', 'gif', 'png']

def load_image(image_path, target_size, data_type='uint8', convert_to_bgr=False,
normalize_data=False, normalize_lower=-1, normalize_upper=1,
subtract_mean=False, given_channel_means='', given_channel_stds='',
quantize=0, quant_scale=1, quant_offset=0, convert_to_unsigned=0):
if not convert_to_bgr:
image = Image.open(image_path).convert('RGB')
else:
image = Image.open(image_path).convert('BGR')

tensor_image = torchvision.transforms.functional.to_tensor(image)
mean = torch.as_tensor(given_channel_means)
std = torch.as_tensor(given_channel_stds)
normalized_image = (tensor_image - mean[:, None, None]) / std[:, None, None]

resized_image = torch.nn.functional.interpolate(normalized_image[None],
size=(target_size, target_size),
mode='bilinear')[0].numpy()

if quantize == 1:
resized_image = quantize_to_uint8(resized_image, quant_scale, quant_offset)

original_height, original_width, _ = resized_image.shape
batch_shape = (1, target_size, target_size, 3)
batch_data = resized_image.reshape(batch_shape)

return batch_data, original_width, original_height

def quantize_to_uint8(image, scale, offset):
quantized_image = (image.astype(np.float64) / scale + offset).astype(np.float64)
output = np.round_(quantized_image)
output = np.clip(output, 0, 255)
return output.astype(np.uint8)

def preprocess_files(selected_filenames, source_dir, destination_dir, square_side,
data_type, convert_to_bgr, normalize_data, normalize_lower,
normalize_upper, subtract_mean, given_channel_means,
given_channel_stds, quantize, quant_scale, quant_offset,
convert_to_unsigned, new_file_extension):
output_signatures = []

for current_idx, input_filename in enumerate(selected_filenames):
full_input_path = os.path.join(source_dir, input_filename)
image_data, original_width, original_height = load_image(
image_path=full_input_path,
target_size=square_side,
data_type=data_type,
convert_to_bgr=convert_to_bgr,
normalize_data=normalize_data,
normalize_lower=normalize_lower,
normalize_upper=normalize_upper,
subtract_mean=subtract_mean,
given_channel_means=given_channel_means,
given_channel_stds=given_channel_stds,
quantize=quantize,
quant_scale=quant_scale,
quant_offset=quant_offset,
convert_to_unsigned=convert_to_unsigned
)

output_filename = f"{input_filename.rsplit('.', 1)[0]}.{new_file_extension}" if new_file_extension else input_filename
full_output_path = os.path.join(destination_dir, output_filename)
image_data.tofile(full_output_path)

print(f"[{current_idx+1}]: Stored {full_output_path}")
output_signatures.append(f'{output_filename};{original_width};{original_height}')

return output_signatures

def preprocess():
source_directory = os.environ['CM_DATASET_PATH']
destination_directory = os.environ['CM_DATASET_PREPROCESSED_PATH']

intermediate_data_type = os.environ.get('CM_DATASET_INTERMEDIATE_DATA_TYPE', np.float32)
square_side = int(os.environ['CM_DATASET_INPUT_SQUARE_SIDE'])
crop_percentage = float(os.environ['CM_DATASET_CROP_FACTOR'])
inter_size = int(os.getenv('CM_DATASET_INTERMEDIATE_SIZE', 0))
convert_to_bgr = int(os.getenv('CM_DATASET_CONVERT_TO_BGR', 0))
offset = int(os.getenv('CM_DATASET_SUBSET_OFFSET', 0))
volume = int(os.environ['CM_DATASET_SIZE'])
fof_name = os.getenv('CM_DATASET_SUBSET_FOF', 'files.txt')
data_type = os.getenv('CM_DATASET_DATA_TYPE_INPUT', 'float32')
input_data_type = os.getenv('CM_DATASET_DATA_TYPE_INPUT', 'float32')
data_layout = os.getenv('CM_DATASET_DATA_LAYOUT', '').lower()
new_file_extension = os.getenv('CM_DATASET_PREPROCESSED_EXTENSION', '')
normalize_data = int(os.getenv('CM_DATASET_NORMALIZE_DATA', '0'))
subtract_mean = int(os.getenv('CM_DATASET_SUBTRACT_MEANS', '0'))
given_channel_means = os.getenv('CM_DATASET_GIVEN_CHANNEL_MEANS', '')
given_channel_stds = os.getenv('CM_DATASET_GIVEN_CHANNEL_STDS', '')
quant_scale = float(os.environ['CM_DATASET_QUANT_SCALE'])
quant_offset = float(os.environ['CM_DATASET_QUANT_OFFSET'])
quantize = int(os.environ['CM_DATASET_QUANTIZE']) # 1 for quantize to int8
convert_to_unsigned = int(os.environ['CM_DATASET_CONVERT_TO_UNSIGNED']) # 1 for int8 to uint8

images_list = os.getenv('CM_DATASET_IMAGES_LIST')
interpolation_method = os.getenv('CM_DATASET_INTERPOLATION_METHOD', '')

annotations_filepath = os.environ['CM_DATASET_ANNOTATIONS_FILE_PATH']
is_calibration = os.environ['CM_DATASET_TYPE'] == "calibration"
image_file = os.getenv('CM_IMAGE_FILE', '')

normalize_lower = float(os.getenv('CM_DATASET_NORMALIZE_LOWER', -1.0))
normalize_upper = float(os.getenv('CM_DATASET_NORMALIZE_UPPER', 1.0))

if given_channel_means:
given_channel_means = np.fromstring(given_channel_means, dtype=np.float32, sep=' ').astype(intermediate_data_type)
if convert_to_bgr:
given_channel_means = given_channel_means[::-1]

given_channel_stds = os.getenv('CM_DATASET_GIVEN_CHANNEL_STDS', '')
if given_channel_stds:
given_channel_stds = np.fromstring(given_channel_stds, dtype=np.float32, sep=' ').astype(intermediate_data_type)
if convert_to_bgr:
given_channel_stds = given_channel_stds[::-1]

print(f"From: {source_directory}, To: {destination_directory}, Size: {square_side}, Crop: {crop_percentage}, InterSize: {inter_size}, 2BGR: {convert_to_bgr}, " +
f"OFF: {offset}, VOL: '{volume}', FOF: {fof_name}, DTYPE: {data_type}, DLAYOUT: {data_layout}, EXT: {new_file_extension}, " +
f"NORM: {normalize_data}, SMEAN: {subtract_mean}, GCM: {given_channel_means}, GSTD: {given_channel_stds}, QUANTIZE: {quantize}, QUANT_SCALE: {quant_scale}, " +
f"QUANT_OFFSET: {quant_offset}, CONV_UNSIGNED: {convert_to_unsigned}, INTER: {interpolation_method}")


if image_file:
source_directory = os.path.dirname(image_file)
selected_filenames = [os.path.basename(image_file)]
else:
if annotations_filepath and not is_calibration:
with open(annotations_filepath, "r") as annotations_fh:
annotations_struct = json.load(annotations_fh)
ordered_filenames = [image_entry['file_name'] for image_entry in annotations_struct['images']]
elif os.path.isdir(source_directory):
ordered_filenames = [filename for filename in sorted(os.listdir(source_directory)) if any(filename.lower().endswith(extension) for extension in SUPPORTED_EXTENSIONS)]
else:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), source_directory)

total_volume = len(ordered_filenames)

if offset < 0:
offset += total_volume

if not volume:
volume = total_volume - offset

selected_filenames = ordered_filenames[offset:offset + volume]

output_signatures = preprocess_files(selected_filenames, source_directory, destination_directory, square_side, data_type,
convert_to_bgr, normalize_data, normalize_lower, normalize_upper,
subtract_mean, given_channel_means, given_channel_stds, quantize,
quant_scale, quant_offset, convert_to_unsigned, new_file_extension)

fof_full_path = os.path.join(destination_directory, fof_name)
with open(fof_full_path, 'w') as fof_file:
for filename in output_signatures:
fof_file.write(f'{filename}\n')

if __name__ == "__main__":
preprocess()

10 changes: 10 additions & 0 deletions cm-mlops/script/get-preprocessed-dataset-openimages/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,16 @@
"CM_DATASET_QUANT_OFFSET": "114"
}
},
"quant-scale.#": {
"const": {
"CM_DATASET_QUANT_SCALE": "#"
}
},
"quant-offset.#": {
"const": {
"CM_DATASET_QUANT_OFFSET": "#"
}
},
"inter.linear": {
"group": "interpolation-method",
"env": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def preprocess(i):
if env.get('CM_DATASET_REFERENCE_PREPROCESSOR',"0") == "1":
print("Using MLCommons Inference source from '" + env['CM_MLPERF_INFERENCE_SOURCE'] +"'")

if env.get('CM_MODEL_NAME', '') == 'retinanet':
if env.get('CM_ML_MODEL_NAME', '') == 'retinanet':
if env.get('CM_DATASET_QUANTIZE', '') == '1':
if env.get('CM_QAIC_MODEL_RETINANET_IMAGE_SCALE', '') != '':
env['CM_DATASET_QUANT_SCALE'] = env['CM_QAIC_MODEL_RETINANET_IMAGE_SCALE']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
if os.environ.get('CM_DATASET_REFERENCE_PREPROCESSOR', '1') == "0":
#import generic_preprocess
#generic_preprocess.preprocess()
import preprocess_image_dataset as pp
import preprocess_object_detection_dataset as pp
pp.preprocess()
else:
dataset_list = os.environ.get('CM_DATASET_ANNOTATIONS_FILE_PATH', None)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,5 +122,5 @@ cm run script --tags=generate-run-cmds,inference,_accuracy-only --device=qaic --
--adr.mlperf-inference-implementation.tags=_bs.1,_dl2q.24xlarge --execution-mode=valid --quiet
```

The expected accuracy is 37.xx
The expected accuracy is 37.234

8 changes: 8 additions & 0 deletions cm-mlops/script/reproduce-mlperf-inference-qualcomm/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,11 @@ deps:
- openimages-preprocessed
- dataset-preprocessed
tags: get,dataset,preprocessed,openimages,_for.retinanet.onnx,_NCHW,_validation,_custom-annotations
update_tags_from_env_with_prefix:
_quant-scale.:
- CM_QAIC_MODEL_RETINANET_IMAGE_OFFSET
_quant-offset.:
- CM_QAIC_MODEL_RETINANET_IMAGE_SCALE
skip_if_env:
CM_MLPERF_SKIP_RUN:
- yes
Expand Down Expand Up @@ -284,6 +289,9 @@ variations:
CM_MLPERF_SKIP_RUN:
- yes
- tags: get,lib,protobuf,_tag.v3.11.4
skip_if_env:
CM_MLPERF_SKIP_RUN:
- yes
- tags: set,device,mode,qaic
enable_if_env:
CM_QAIC_VC:
Expand Down
19 changes: 11 additions & 8 deletions cm-mlops/script/reproduce-mlperf-inference-qualcomm/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,19 @@ def preprocess(i):
env['+ CXXFLAGS'].append("-DMODEL_RX50")

keys = [ 'LOC_OFFSET', 'LOC_SCALE', 'CONF_OFFSET', 'CONF_SCALE' ]
for i in range(0,4):
keys.append(f'LOC_OFFSET_{i}')
keys.append(f'LOC_SCALE_{i}')
keys.append(f'CONF_OFFSET_{i}')
keys.append(f'CONF_SCALE_{i}')

if env.get('CM_RETINANET_USE_MULTIPLE_SCALES_OFFSETS', '') == 'yes':
env['+ CXXFLAGS'].append("-DUSE_MULTIPLE_SCALES_OFFSETS=1")
for j in range(0,4):
keys.append(f'LOC_OFFSET{j}')
keys.append(f'LOC_SCALE{j}')
keys.append(f'CONF_OFFSET{j}')
keys.append(f'CONF_SCALE{j}')

for key in keys:
value = env.get('CM_QAIC_MODEL_RETINANET_'+key)
if value:
env['+ CXXFLAGS'].append(f" -D{key}={value} ")
value = env.get('CM_QAIC_MODEL_RETINANET_'+key, '')
if value != '':
env['+ CXXFLAGS'].append(f" -D{key}_={value} ")

if env.get('CM_BENCHMARK', '') == 'NETWORK_BERT_SERVER':
source_files.append(os.path.join(kilt_root, "benchmarks", "network", "bert", "server", "pack.cpp"))
Expand Down

0 comments on commit 2d0adb0

Please sign in to comment.