Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge from CTuning #1049

Merged
merged 10 commits into from
Jan 8, 2024
Merged
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ wheels/
htmlcov
*tmp/
*tmp-ck-*/
cache/
local/cache/

4 changes: 2 additions & 2 deletions cm-mlops/automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2462,8 +2462,8 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a
update_tags_from_env_with_prefix = d.get("update_tags_from_env_with_prefix", {})
for t in update_tags_from_env_with_prefix:
for key in update_tags_from_env_with_prefix[t]:
if env.get(key, '').strip() != '':
d['tags']+=","+t+env[key]
if str(env.get(key, '')).strip() != '':
d['tags']+=","+t+str(env[key])

for key in clean_env_keys_deps:
if '?' in key or '*' in key:
Expand Down
27 changes: 12 additions & 15 deletions cm-mlops/script/calibrate-model-for.qaic/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def preprocess(i):
if r['return'] > 0:
return r
cmd = r['cmd']

print("Profiling from "+ os.getcwd())

env['CM_RUN_CMD'] = cmd
Expand Down Expand Up @@ -83,8 +83,10 @@ def postprocess(i):
profile_file_path = os.path.join(os.getcwd(), "profile.yaml")
env['CM_QAIC_MODEL_PROFILE_WITH_PATH'] = profile_file_path

input_layer_name = env.get('CM_ML_MODEL_INPUT_LAYER_NAME', 'images:0')

if env.get('CM_ML_MODEL_INPUT_LAYER_NAME', '') != '':
input_layer_names = [ env.get('CM_ML_MODEL_INPUT_LAYER_NAME') ]
else:
input_layer_names = [ "images:0", "images/:0" ]

output_layer_names_conf = [ [], [] ]
output_layer_names_loc = [ [], [] ]
Expand All @@ -101,7 +103,7 @@ def postprocess(i):
"/TopK/:0",
"/TopK_1/:0",
"/TopK_2/:0",
"/TopK_3/:0"
"/TopK_3/:0",
"/TopK_4/:0"
]

Expand Down Expand Up @@ -133,22 +135,18 @@ def postprocess(i):
if type(doc) == list:

node_names = [ k['NodeOutputName'] for k in doc]
#print(node_names)
oindex = None

for output in output_layer_names_loc:
if output[0] in node_names:
print(output[0])
oindex = output_layer_names_loc.index(output)
print(oindex)
break

if oindex is None:
return {'return': 1, 'error': 'Output node names not found for the given retinanet model'}

for k in doc:
#print(k['NodeOutputName'])
if k["NodeOutputName"] == input_layer_name:
if k["NodeOutputName"] in input_layer_names:
min_val = k['Min']
max_val = k['Max']
scale, offset = get_scale_offset(min_val, max_val)
Expand All @@ -164,8 +162,8 @@ def postprocess(i):
output_max_val_loc = max_val
loc_scale, loc_offset = get_scale_offset(min_val, max_val)
index = output_layer_names_loc[oindex].index(k["NodeOutputName"])
env[f'CM_QAIC_MODEL_RETINANET_LOC_SCALE_{index}'] = loc_scale
env[f'CM_QAIC_MODEL_RETINANET_LOC_OFFSET_{index}'] = loc_offset - 128 # to uint8 is done in NMS code
env[f'CM_QAIC_MODEL_RETINANET_LOC_SCALE{index}'] = loc_scale
env[f'CM_QAIC_MODEL_RETINANET_LOC_OFFSET{index}'] = loc_offset - 128 # to uint8 is done in NMS code

total_range = max_val - min_val
scale = total_range/256.0
Expand All @@ -180,8 +178,8 @@ def postprocess(i):
output_max_val_conf = max_val
conf_scale, conf_offset = get_scale_offset(min_val, max_val)
index = output_layer_names_conf[oindex].index(k["NodeOutputName"])
env[f'CM_QAIC_MODEL_RETINANET_CONF_SCALE_{index}'] = conf_scale
env[f'CM_QAIC_MODEL_RETINANET_CONF_OFFSET_{index}'] = conf_offset - 128 # to uint8 is done in NMS code
env[f'CM_QAIC_MODEL_RETINANET_CONF_SCALE{index}'] = conf_scale
env[f'CM_QAIC_MODEL_RETINANET_CONF_OFFSET{index}'] = conf_offset - 128 # to uint8 is done in NMS code
total_range = max_val - min_val
scale = total_range/256.0
offset = round(-min_val / scale)
Expand All @@ -196,8 +194,7 @@ def postprocess(i):
except yaml.YAMLError as exc:
return {'return': 1, 'error': exc}

print(env)
return {'return':1}
return {'return':0}

def get_scale_offset(min_val, max_val):
total_range = max_val - min_val
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def preprocess(i):

rerun = True if env.get("CM_RERUN","")!='' else False

env['CM_MLPERF_SKIP_RUN'] = "no"
env['CM_MLPERF_SKIP_RUN'] = env.get('CM_MLPERF_SKIP_RUN', "no")

mlperf_path = env['CM_MLPERF_INFERENCE_SOURCE']
submission_checker_dir = os.path.join(mlperf_path, "tools", "submission")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def preprocess():
interpolation_method = os.getenv('CM_DATASET_INTERPOLATION_METHOD', '')

print(("From: {}, To: {}, Size: {}, Crop: {}, InterSize: {}, 2BGR: {}, OFF: {}, VOL: '{}', FOF: {},"+
" DTYPE: {}, DLAYOUT: {}, EXT: {}, NORM: {}, SMEAN: {}, GCM: {}, QUANTIZE: {}, QUANT_SCALE: {}, QUANT_OFFSET: {}, CONV_UNSIGNED: {}, INTER: {}").format(
" DTYPE: {}, DLAYOUT: {}, EXT: {}, NORM: {}, SMEAN: {}, GCM: {}, GSTD: {}, QUANTIZE: {}, QUANT_SCALE: {}, QUANT_OFFSET: {}, CONV_UNSIGNED: {}, INTER: {}").format(
source_dir, destination_dir, square_side, crop_percentage, inter_size, convert_to_bgr, offset, volume, fof_name,
data_type, data_layout, new_file_extension, normalize_data, subtract_mean, given_channel_means, given_channel_stds, quantize, quant_scale, quant_offset, convert_to_unsigned, interpolation_method) )

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/usr/bin/env python3

import os
import json
import numpy as np
from PIL import Image
import torch
import torchvision

SUPPORTED_EXTENSIONS = ['jpeg', 'jpg', 'gif', 'png']

def load_image(image_path, target_size, data_type='uint8', convert_to_bgr=False,
normalize_data=False, normalize_lower=-1, normalize_upper=1,
subtract_mean=False, given_channel_means='', given_channel_stds='',
quantize=0, quant_scale=1, quant_offset=0, convert_to_unsigned=0):
if not convert_to_bgr:
image = Image.open(image_path).convert('RGB')
else:
image = Image.open(image_path).convert('BGR')

tensor_image = torchvision.transforms.functional.to_tensor(image)
mean = torch.as_tensor(given_channel_means)
std = torch.as_tensor(given_channel_stds)
normalized_image = (tensor_image - mean[:, None, None]) / std[:, None, None]

resized_image = torch.nn.functional.interpolate(normalized_image[None],
size=(target_size, target_size),
mode='bilinear')[0].numpy()

if quantize == 1:
resized_image = quantize_to_uint8(resized_image, quant_scale, quant_offset)

original_height, original_width, _ = resized_image.shape
batch_shape = (1, target_size, target_size, 3)
batch_data = resized_image.reshape(batch_shape)

return batch_data, original_width, original_height

def quantize_to_uint8(image, scale, offset):
quantized_image = (image.astype(np.float64) / scale + offset).astype(np.float64)
output = np.round_(quantized_image)
output = np.clip(output, 0, 255)
return output.astype(np.uint8)

def preprocess_files(selected_filenames, source_dir, destination_dir, square_side,
data_type, convert_to_bgr, normalize_data, normalize_lower,
normalize_upper, subtract_mean, given_channel_means,
given_channel_stds, quantize, quant_scale, quant_offset,
convert_to_unsigned, new_file_extension):
output_signatures = []

for current_idx, input_filename in enumerate(selected_filenames):
full_input_path = os.path.join(source_dir, input_filename)
image_data, original_width, original_height = load_image(
image_path=full_input_path,
target_size=square_side,
data_type=data_type,
convert_to_bgr=convert_to_bgr,
normalize_data=normalize_data,
normalize_lower=normalize_lower,
normalize_upper=normalize_upper,
subtract_mean=subtract_mean,
given_channel_means=given_channel_means,
given_channel_stds=given_channel_stds,
quantize=quantize,
quant_scale=quant_scale,
quant_offset=quant_offset,
convert_to_unsigned=convert_to_unsigned
)

output_filename = f"{input_filename.rsplit('.', 1)[0]}.{new_file_extension}" if new_file_extension else input_filename
full_output_path = os.path.join(destination_dir, output_filename)
image_data.tofile(full_output_path)

print(f"[{current_idx+1}]: Stored {full_output_path}")
output_signatures.append(f'{output_filename};{original_width};{original_height}')

return output_signatures

def preprocess():
source_directory = os.environ['CM_DATASET_PATH']
destination_directory = os.environ['CM_DATASET_PREPROCESSED_PATH']

intermediate_data_type = os.environ.get('CM_DATASET_INTERMEDIATE_DATA_TYPE', np.float32)
square_side = int(os.environ['CM_DATASET_INPUT_SQUARE_SIDE'])
crop_percentage = float(os.environ['CM_DATASET_CROP_FACTOR'])
inter_size = int(os.getenv('CM_DATASET_INTERMEDIATE_SIZE', 0))
convert_to_bgr = int(os.getenv('CM_DATASET_CONVERT_TO_BGR', 0))
offset = int(os.getenv('CM_DATASET_SUBSET_OFFSET', 0))
volume = int(os.environ['CM_DATASET_SIZE'])
fof_name = os.getenv('CM_DATASET_SUBSET_FOF', 'files.txt')
data_type = os.getenv('CM_DATASET_DATA_TYPE_INPUT', 'float32')
input_data_type = os.getenv('CM_DATASET_DATA_TYPE_INPUT', 'float32')
data_layout = os.getenv('CM_DATASET_DATA_LAYOUT', '').lower()
new_file_extension = os.getenv('CM_DATASET_PREPROCESSED_EXTENSION', '')
normalize_data = int(os.getenv('CM_DATASET_NORMALIZE_DATA', '0'))
subtract_mean = int(os.getenv('CM_DATASET_SUBTRACT_MEANS', '0'))
given_channel_means = os.getenv('CM_DATASET_GIVEN_CHANNEL_MEANS', '')
given_channel_stds = os.getenv('CM_DATASET_GIVEN_CHANNEL_STDS', '')
quant_scale = float(os.environ['CM_DATASET_QUANT_SCALE'])
quant_offset = float(os.environ['CM_DATASET_QUANT_OFFSET'])
quantize = int(os.environ['CM_DATASET_QUANTIZE']) # 1 for quantize to int8
convert_to_unsigned = int(os.environ['CM_DATASET_CONVERT_TO_UNSIGNED']) # 1 for int8 to uint8

images_list = os.getenv('CM_DATASET_IMAGES_LIST')
interpolation_method = os.getenv('CM_DATASET_INTERPOLATION_METHOD', '')

annotations_filepath = os.environ['CM_DATASET_ANNOTATIONS_FILE_PATH']
is_calibration = os.environ['CM_DATASET_TYPE'] == "calibration"
image_file = os.getenv('CM_IMAGE_FILE', '')

normalize_lower = float(os.getenv('CM_DATASET_NORMALIZE_LOWER', -1.0))
normalize_upper = float(os.getenv('CM_DATASET_NORMALIZE_UPPER', 1.0))

if given_channel_means:
given_channel_means = np.fromstring(given_channel_means, dtype=np.float32, sep=' ').astype(intermediate_data_type)
if convert_to_bgr:
given_channel_means = given_channel_means[::-1]

given_channel_stds = os.getenv('CM_DATASET_GIVEN_CHANNEL_STDS', '')
if given_channel_stds:
given_channel_stds = np.fromstring(given_channel_stds, dtype=np.float32, sep=' ').astype(intermediate_data_type)
if convert_to_bgr:
given_channel_stds = given_channel_stds[::-1]

print(f"From: {source_directory}, To: {destination_directory}, Size: {square_side}, Crop: {crop_percentage}, InterSize: {inter_size}, 2BGR: {convert_to_bgr}, " +
f"OFF: {offset}, VOL: '{volume}', FOF: {fof_name}, DTYPE: {data_type}, DLAYOUT: {data_layout}, EXT: {new_file_extension}, " +
f"NORM: {normalize_data}, SMEAN: {subtract_mean}, GCM: {given_channel_means}, GSTD: {given_channel_stds}, QUANTIZE: {quantize}, QUANT_SCALE: {quant_scale}, " +
f"QUANT_OFFSET: {quant_offset}, CONV_UNSIGNED: {convert_to_unsigned}, INTER: {interpolation_method}")


if image_file:
source_directory = os.path.dirname(image_file)
selected_filenames = [os.path.basename(image_file)]
else:
if annotations_filepath and not is_calibration:
with open(annotations_filepath, "r") as annotations_fh:
annotations_struct = json.load(annotations_fh)
ordered_filenames = [image_entry['file_name'] for image_entry in annotations_struct['images']]
elif os.path.isdir(source_directory):
ordered_filenames = [filename for filename in sorted(os.listdir(source_directory)) if any(filename.lower().endswith(extension) for extension in SUPPORTED_EXTENSIONS)]
else:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), source_directory)

total_volume = len(ordered_filenames)

if offset < 0:
offset += total_volume

if not volume:
volume = total_volume - offset

selected_filenames = ordered_filenames[offset:offset + volume]

output_signatures = preprocess_files(selected_filenames, source_directory, destination_directory, square_side, data_type,
convert_to_bgr, normalize_data, normalize_lower, normalize_upper,
subtract_mean, given_channel_means, given_channel_stds, quantize,
quant_scale, quant_offset, convert_to_unsigned, new_file_extension)

fof_full_path = os.path.join(destination_directory, fof_name)
with open(fof_full_path, 'w') as fof_file:
for filename in output_signatures:
fof_file.write(f'{filename}\n')

if __name__ == "__main__":
preprocess()

10 changes: 10 additions & 0 deletions cm-mlops/script/get-preprocessed-dataset-openimages/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,16 @@
"CM_DATASET_QUANT_OFFSET": "114"
}
},
"quant-scale.#": {
"const": {
"CM_DATASET_QUANT_SCALE": "#"
}
},
"quant-offset.#": {
"const": {
"CM_DATASET_QUANT_OFFSET": "#"
}
},
"inter.linear": {
"group": "interpolation-method",
"env": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def preprocess(i):
if env.get('CM_DATASET_REFERENCE_PREPROCESSOR',"0") == "1":
print("Using MLCommons Inference source from '" + env['CM_MLPERF_INFERENCE_SOURCE'] +"'")

if env.get('CM_MODEL_NAME', '') == 'retinanet':
if env.get('CM_ML_MODEL_NAME', '') == 'retinanet':
if env.get('CM_DATASET_QUANTIZE', '') == '1':
if env.get('CM_QAIC_MODEL_RETINANET_IMAGE_SCALE', '') != '':
env['CM_DATASET_QUANT_SCALE'] = env['CM_QAIC_MODEL_RETINANET_IMAGE_SCALE']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
if os.environ.get('CM_DATASET_REFERENCE_PREPROCESSOR', '1') == "0":
#import generic_preprocess
#generic_preprocess.preprocess()
import preprocess_image_dataset as pp
import preprocess_object_detection_dataset as pp
pp.preprocess()
else:
dataset_list = os.environ.get('CM_DATASET_ANNOTATIONS_FILE_PATH', None)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,5 +122,5 @@ cm run script --tags=generate-run-cmds,inference,_accuracy-only --device=qaic --
--adr.mlperf-inference-implementation.tags=_bs.1,_dl2q.24xlarge --execution-mode=valid --quiet
```

The expected accuracy is 37.xx
The expected accuracy is 37.234

Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,11 @@ deps:
- openimages-preprocessed
- dataset-preprocessed
tags: get,dataset,preprocessed,openimages,_for.retinanet.onnx,_NCHW,_validation,_custom-annotations
update_tags_from_env_with_prefix:
_quant-scale.:
- CM_QAIC_MODEL_RETINANET_IMAGE_OFFSET
_quant-offset.:
- CM_QAIC_MODEL_RETINANET_IMAGE_SCALE
skip_if_env:
CM_MLPERF_SKIP_RUN:
- yes
Expand Down Expand Up @@ -284,6 +289,9 @@ variations:
CM_MLPERF_SKIP_RUN:
- yes
- tags: get,lib,protobuf,_tag.v3.11.4
skip_if_env:
CM_MLPERF_SKIP_RUN:
- yes
- tags: set,device,mode,qaic
enable_if_env:
CM_QAIC_VC:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,19 @@ def preprocess(i):
env['+ CXXFLAGS'].append("-DMODEL_RX50")

keys = [ 'LOC_OFFSET', 'LOC_SCALE', 'CONF_OFFSET', 'CONF_SCALE' ]
for i in range(0,4):
keys.append(f'LOC_OFFSET_{i}')
keys.append(f'LOC_SCALE_{i}')
keys.append(f'CONF_OFFSET_{i}')
keys.append(f'CONF_SCALE_{i}')

if env.get('CM_RETINANET_USE_MULTIPLE_SCALES_OFFSETS', '') == 'yes':
env['+ CXXFLAGS'].append("-DUSE_MULTIPLE_SCALES_OFFSETS=1")
for j in range(0,4):
keys.append(f'LOC_OFFSET{j}')
keys.append(f'LOC_SCALE{j}')
keys.append(f'CONF_OFFSET{j}')
keys.append(f'CONF_SCALE{j}')

for key in keys:
value = env.get('CM_QAIC_MODEL_RETINANET_'+key)
if value:
env['+ CXXFLAGS'].append(f" -D{key}={value} ")
value = env.get('CM_QAIC_MODEL_RETINANET_'+key, '')
if value != '':
env['+ CXXFLAGS'].append(f" -D{key}_={value} ")

if env.get('CM_BENCHMARK', '') == 'NETWORK_BERT_SERVER':
source_files.append(os.path.join(kilt_root, "benchmarks", "network", "bert", "server", "pack.cpp"))
Expand Down