From 0609ad47388abba37d074736014a239d0839a951 Mon Sep 17 00:00:00 2001 From: Eugene Fedorenko Date: Thu, 30 Apr 2020 14:07:38 -0700 Subject: [PATCH] Fixes. End-to-End Pipeline Example on Azure (#788) * Fixes * Fixes --- .../azurepipeline/code/deploy/Dockerfile | 0 pipelines/azurepipeline/code/deploy/score.py | 115 +++---- .../azurepipeline/code/preprocess/Dockerfile | 0 .../azurepipeline/code/preprocess/data.py | 188 +++++------ .../azurepipeline/code/register/Dockerfile | 0 .../azurepipeline/code/register/register.py | 166 +++++----- .../azurepipeline/code/training/Dockerfile | 0 .../azurepipeline/code/training/train.py | 301 +++++++++--------- 8 files changed, 397 insertions(+), 373 deletions(-) mode change 100644 => 100755 pipelines/azurepipeline/code/deploy/Dockerfile mode change 100644 => 100755 pipelines/azurepipeline/code/deploy/score.py mode change 100644 => 100755 pipelines/azurepipeline/code/preprocess/Dockerfile mode change 100644 => 100755 pipelines/azurepipeline/code/preprocess/data.py mode change 100644 => 100755 pipelines/azurepipeline/code/register/Dockerfile mode change 100644 => 100755 pipelines/azurepipeline/code/register/register.py mode change 100644 => 100755 pipelines/azurepipeline/code/training/Dockerfile mode change 100644 => 100755 pipelines/azurepipeline/code/training/train.py diff --git a/pipelines/azurepipeline/code/deploy/Dockerfile b/pipelines/azurepipeline/code/deploy/Dockerfile old mode 100644 new mode 100755 diff --git a/pipelines/azurepipeline/code/deploy/score.py b/pipelines/azurepipeline/code/deploy/score.py old mode 100644 new mode 100755 index bafdcf4a8..da092dce7 --- a/pipelines/azurepipeline/code/deploy/score.py +++ b/pipelines/azurepipeline/code/deploy/score.py @@ -11,83 +11,84 @@ def init(): - if Model.get_model_path('tacosandburritos'): - model_path = Model.get_model_path('tacosandburritos') - else: - model_path = '/model/latest.h5' + global model + if Model.get_model_path('tacosandburritos'): + model_path = Model.get_model_path('tacosandburritos') + else: + model_path = '/model/latest.h5' - print('Attempting to load model') - model = tf.keras.models.load_model(model_path) - model.summary() - print('Done!') + print('Attempting to load model') + model = tf.keras.models.load_model(model_path) + model.summary() + print('Done!') - print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now())) - return model + print('Initialized model "{}" at {}'.format( + model_path, datetime.datetime.now())) -def run(raw_data, model): - prev_time = time.time() +def run(raw_data): + prev_time = time.time() - post = json.loads(raw_data) - img_path = post['image'] + post = json.loads(raw_data) + img_path = post['image'] - current_time = time.time() + current_time = time.time() - tensor = process_image(img_path, 160) - t = tf.reshape(tensor, [-1, 160, 160, 3]) - o = model.predict(t, steps=1) # [0][0] - print(o) - o = o[0][0] - inference_time = datetime.timedelta(seconds=current_time - prev_time) - payload = { - 'time': inference_time.total_seconds(), - 'prediction': 'burrito' if o > 0.5 else 'tacos', - 'scores': str(o) - } + tensor = process_image(img_path, 160) + t = tf.reshape(tensor, [-1, 160, 160, 3]) + o = model.predict(t, steps=1) # [0][0] + print(o) + o = o[0][0] + inference_time = datetime.timedelta(seconds=current_time - prev_time) + payload = { + 'time': inference_time.total_seconds(), + 'prediction': 'burrito' if o > 0.5 else 'tacos', + 'scores': str(o) + } - print('Input ({}), Prediction ({})'.format(post['image'], payload)) + print('Input ({}), Prediction ({})'.format(post['image'], payload)) - return payload + return payload def process_image(path, image_size): - # Extract image (from web or path) - if path.startswith('http'): - response = requests.get(path) - img = np.array(Image.open(BytesIO(response.content))) - else: - img = np.array(Image.open(path)) + # Extract image (from web or path) + if path.startswith('http'): + response = requests.get(path) + img = np.array(Image.open(BytesIO(response.content))) + else: + img = np.array(Image.open(path)) - img_tensor = tf.convert_to_tensor(img, dtype=tf.float32) - # tf.image.decode_jpeg(img_raw, channels=3) - img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255 - return img_final + img_tensor = tf.convert_to_tensor(img, dtype=tf.float32) + # tf.image.decode_jpeg(img_raw, channels=3) + img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255 + return img_final def info(msg, char="#", width=75): - print("") - print(char * width) - print(char + " %0*s" % ((-1 * width) + 5, msg) + char) - print(char * width) + print("") + print(char * width) + print(char + " %0*s" % ((-1 * width) + 5, msg) + char) + print(char * width) if __name__ == "__main__": - images = { - 'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg', - 'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg' - } + images = { + 'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg', # noqa: E501 + 'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg' # noqa: E501 + } - my_model = init() + init() - for k, v in images.items(): - print('{} => {}'.format(k, v)) + for k, v in images.items(): + print('{} => {}'.format(k, v)) - info('Taco Test') - taco = json.dumps({'image': images['tacos']}) - print(taco) - run(taco, my_model) + info('Taco Test') + taco = json.dumps({'image': images['tacos']}) + print(taco) + run(taco) - info('Burrito Test') - burrito = json.dumps({'image': images['burrito']}) - print(burrito) - run(burrito, my_model) + info('Burrito Test') + burrito = json.dumps({'image': images['burrito']}) + print(burrito) + run(burrito) diff --git a/pipelines/azurepipeline/code/preprocess/Dockerfile b/pipelines/azurepipeline/code/preprocess/Dockerfile old mode 100644 new mode 100755 diff --git a/pipelines/azurepipeline/code/preprocess/data.py b/pipelines/azurepipeline/code/preprocess/data.py old mode 100644 new mode 100755 index b21c13dd1..05165bde7 --- a/pipelines/azurepipeline/code/preprocess/data.py +++ b/pipelines/azurepipeline/code/preprocess/data.py @@ -8,111 +8,115 @@ def check_dir(path): - if not os.path.exists(path): - os.makedirs(path) - return Path(path).resolve(strict=False) + if not os.path.exists(path): + os.makedirs(path) + return Path(path).resolve(strict=False) def download(source, target, force_clear=False): - if force_clear and os.path.exists(target): - print('Removing {}...'.format(target)) - shutil.rmtree(target) + if force_clear and os.path.exists(target): + print('Removing {}...'.format(target)) + shutil.rmtree(target) - check_dir(target) + check_dir(target) - targt_file = str(Path(target).joinpath('data.zip')) - if os.path.exists(targt_file) and not force_clear: - print('data already exists, skipping download') - return + targt_file = str(Path(target).joinpath('data.zip')) + if os.path.exists(targt_file) and not force_clear: + print('data already exists, skipping download') + return - if source.startswith('http'): - print("Downloading from {} to {}".format(source, target)) - wget.download(source, targt_file) - print("Done!") - else: - print("Copying from {} to {}".format(source, target)) - shutil.copyfile(source, targt_file) + if source.startswith('http'): + print("Downloading from {} to {}".format(source, target)) + wget.download(source, targt_file) + print("Done!") + else: + print("Copying from {} to {}".format(source, target)) + shutil.copyfile(source, targt_file) - print('Unzipping {}'.format(targt_file)) - zipr = zipfile.ZipFile(targt_file) - zipr.extractall(target) - zipr.close() + print('Unzipping {}'.format(targt_file)) + zipr = zipfile.ZipFile(targt_file) + zipr.extractall(target) + zipr.close() def process_image(path, image_size=160): - img_raw = tf.io.read_file(path) - img_tensor = tf.image.decode_jpeg(img_raw, channels=3) - img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255 - return img_final + img_raw = tf.io.read_file(path) + img_tensor = tf.image.decode_jpeg(img_raw, channels=3) + img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255 + return img_final def walk_images(path, image_size=160): - imgs = [] - print('Scanning {}'.format(path)) - # find subdirectories in base path - # (they should be the labels) - labels = [] - for (_, dirs, _) in os.walk(path): - print('Found {}'.format(dirs)) - labels = dirs - break - - for d in labels: - tmp_path = os.path.join(path, d) - print('Processing {}'.format(tmp_path)) - # only care about files in directory - for item in os.listdir(tmp_path): - if not item.lower().endswith('.jpg'): - print('skipping {}'.format(item)) - continue - - image = os.path.join(tmp_path, item) - try: - img = process_image(image, image_size) - assert img.shape[2] == 3, "Invalid channel count" - # write out good images - imgs.append(image) - except img.shape[2] != 3: - print('{}\n'.format(image)) - - return imgs + imgs = [] + print('Scanning {}'.format(path)) + # find subdirectories in base path + # (they should be the labels) + labels = [] + for (_, dirs, _) in os.walk(path): + print('Found {}'.format(dirs)) + labels = dirs + break + + for d in labels: + tmp_path = os.path.join(path, d) + print('Processing {}'.format(tmp_path)) + # only care about files in directory + for item in os.listdir(tmp_path): + if not item.lower().endswith('.jpg'): + print('skipping {}'.format(item)) + continue + + image = os.path.join(tmp_path, item) + try: + img = process_image(image, image_size) + assert img.shape[2] == 3, "Invalid channel count" + # write out good images + imgs.append(image) + except img.shape[2] != 3: + print('{}\n'.format(image)) + + return imgs if __name__ == "__main__": - parser = argparse.ArgumentParser(description='data cleaning for binary image task') - parser.add_argument('-b', '--base_path', help='directory to base data', default='../../data') - parser.add_argument('-d', '--data', help='directory to training data', default='train') - parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt') - parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int) - parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip') - parser.add_argument('-f', '--force', - help='force clear all data', default=False, action='store_true') - args = parser.parse_args() - print(args) - - print('Using TensorFlow v.{}'.format(tf.__version__)) - - base_path = Path(args.base_path).resolve(strict=False) - print('Base Path: {}'.format(base_path)) - data_path = base_path.joinpath(args.data).resolve(strict=False) - print('Train Path: {}'.format(data_path)) - target_path = Path(base_path).resolve(strict=False).joinpath(args.target) - print('Train File: {}'.format(target_path)) - zip_path = args.zipfile - - print('Acquiring data...') - download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip', - str(base_path), args.force) - - if os.path.exists(str(target_path)): - print('dataset text file already exists, skipping check') - else: - print('Testing images...') - images = walk_images(str(data_path), args.img_size) - - # save file - print('writing dataset to {}'.format(target_path)) - with open(str(target_path), 'w+') as f: - f.write('\n'.join(images)) - - # python data.py -z https://aiadvocate.blob.core.windows.net/public/tacodata.zip -t train.txt + parser = argparse.ArgumentParser( + description='data cleaning for binary image task') + parser.add_argument('-b', '--base_path', + help='directory to base data', default='../../data') + parser.add_argument( + '-d', '--data', help='directory to training data', default='train') + parser.add_argument( + '-t', '--target', help='target file to hold good data', default='train.txt') # noqa: E501 + parser.add_argument( + '-i', '--img_size', help='target image size to verify', default=160, type=int) # noqa: E501 + parser.add_argument( + '-z', '--zipfile', help='source data zip file', default='../../tacodata.zip') # noqa: E501 + parser.add_argument('-f', '--force', + help='force clear all data', default=False, action='store_true') # noqa: E501 + args = parser.parse_args() + print(args) + + print('Using TensorFlow v.{}'.format(tf.__version__)) + + base_path = Path(args.base_path).resolve(strict=False) + print('Base Path: {}'.format(base_path)) + data_path = base_path.joinpath(args.data).resolve(strict=False) + print('Train Path: {}'.format(data_path)) + target_path = Path(base_path).resolve(strict=False).joinpath(args.target) + print('Train File: {}'.format(target_path)) + zip_path = args.zipfile + + print('Acquiring data...') + download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip', + str(base_path), args.force) + + if os.path.exists(str(target_path)): + print('dataset text file already exists, skipping check') + else: + print('Testing images...') + images = walk_images(str(data_path), args.img_size) + + # save file + print('writing dataset to {}'.format(target_path)) + with open(str(target_path), 'w+') as f: + f.write('\n'.join(images)) diff --git a/pipelines/azurepipeline/code/register/Dockerfile b/pipelines/azurepipeline/code/register/Dockerfile old mode 100644 new mode 100755 diff --git a/pipelines/azurepipeline/code/register/register.py b/pipelines/azurepipeline/code/register/register.py old mode 100644 new mode 100755 index 8ef55c723..8de597af4 --- a/pipelines/azurepipeline/code/register/register.py +++ b/pipelines/azurepipeline/code/register/register.py @@ -9,94 +9,102 @@ def info(msg, char="#", width=75): - print("") - print(char * width) - print(char + " %0*s" % ((-1 * width) + 5, msg) + char) - print(char * width) + print("") + print(char * width) + print(char + " %0*s" % ((-1 * width) + 5, msg) + char) + print(char * width) def get_ws(tenant_id, service_principal_id, - service_principal_password, subscription_id, resource_group, workspace): - auth_args = { - 'tenant_id': tenant_id, - 'service_principal_id': service_principal_id, - 'service_principal_password': service_principal_password - } - - ws_args = { - 'auth': ServicePrincipalAuthentication(**auth_args), - 'subscription_id': subscription_id, - 'resource_group': resource_group - } - ws = Workspace.get(workspace, **ws_args) - return ws + service_principal_password, subscription_id, resource_group, workspace): # noqa: E501 + auth_args = { + 'tenant_id': tenant_id, + 'service_principal_id': service_principal_id, + 'service_principal_password': service_principal_password + } + + ws_args = { + 'auth': ServicePrincipalAuthentication(**auth_args), + 'subscription_id': subscription_id, + 'resource_group': resource_group + } + ws = Workspace.get(workspace, **ws_args) + return ws def run(mdl_path, model_name, ws, tgs): - print(ws.get_details()) + print(ws.get_details()) - print('\nSaving model {} to {}'.format(mdl_path, model_name)) + print('\nSaving model {} to {}'.format(mdl_path, model_name)) - # Model Path needs to be relative - mdl_path = relpath(mdl_path, '.') + # Model Path needs to be relative + mdl_path = relpath(mdl_path, '.') - Model.register(ws, model_name=model_name, model_path=mdl_path, tags=tgs) - print('Done!') + Model.register(ws, model_name=model_name, model_path=mdl_path, tags=tgs) + print('Done!') if __name__ == "__main__": - # argparse stuff for model path and model name - parser = argparse.ArgumentParser(description='sanity check on model') - parser.add_argument('-b', '--base_path', help='directory to base folder', default='../../data') - parser.add_argument('-m', '--model', help='path to model file', default='/model/latest.h5') - parser.add_argument('-n', '--model_name', help='AML Model name', default='tacosandburritos') - parser.add_argument('-t', '--tenant_id', help='tenant_id') - parser.add_argument('-s', '--service_principal_id', help='service_principal_id') - parser.add_argument('-p', '--service_principal_password', help='service_principal_password') - parser.add_argument('-u', '--subscription_id', help='subscription_id') - parser.add_argument('-r', '--resource_group', help='resource_group') - parser.add_argument('-w', '--workspace', help='workspace') - args = parser.parse_args() - - print('Azure ML SDK Version: {}'.format(azureml.core.VERSION)) - args.model = 'model/' + args.model - model_path = str(Path(args.base_path).resolve( - strict=False).joinpath(args.model).resolve(strict=False)) - params_path = str(Path(args.base_path).resolve( - strict=False).joinpath('params.json').resolve(strict=False)) - wsrgs = { - 'tenant_id': args.tenant_id, - 'service_principal_id': args.service_principal_id, - 'service_principal_password': args.service_principal_password, - 'subscription_id': args.subscription_id, - 'resource_group': args.resource_group, - 'workspace': args.workspace - } - rgs = { - 'mdl_path': model_path, - 'model_name': args.model_name - } - - # printing out args for posterity - for i in wsrgs: - if i == 'service_principal_password': - print('{} => **********'.format(i)) - else: - print('{} => {}'.format(i, wsrgs[i])) - - with(open(str(params_path), 'r')) as f: - tags = json.load(f) - - print('\n\nUsing the following tags:') - for tag in tags: - print('{} => {}'.format(tag, tags[tag])) - - rgs['tgs'] = tags - - workspc = get_ws(**wsrgs) - rgs['ws'] = workspc - run(**rgs) - - # python register.py --model_path v --model_name c --tenant_id c - # --service_principal_id v --service_principal_password v - # --subscription_id v --resource_group x --workspace c + # argparse stuff for model path and model name + parser = argparse.ArgumentParser(description='sanity check on model') + parser.add_argument('-b', '--base_path', + help='directory to base folder', default='../../data') + parser.add_argument( + '-m', '--model', help='path to model file', default='/model/latest.h5') + parser.add_argument('-n', '--model_name', + help='AML Model name', default='tacosandburritos') + parser.add_argument('-t', '--tenant_id', help='tenant_id') + parser.add_argument('-s', '--service_principal_id', + help='service_principal_id') + parser.add_argument('-p', '--service_principal_password', + help='service_principal_password') + parser.add_argument('-u', '--subscription_id', help='subscription_id') + parser.add_argument('-r', '--resource_group', help='resource_group') + parser.add_argument('-w', '--workspace', help='workspace') + args = parser.parse_args() + + print('Azure ML SDK Version: {}'.format(azureml.core.VERSION)) + args.model = 'model/' + args.model + model_path = str(Path(args.base_path).resolve( + strict=False).joinpath(args.model).resolve(strict=False)) + params_path = str(Path(args.base_path).resolve( + strict=False).joinpath('params.json').resolve(strict=False)) + wsrgs = { + 'tenant_id': args.tenant_id, + 'service_principal_id': args.service_principal_id, + 'service_principal_password': args.service_principal_password, + 'subscription_id': args.subscription_id, + 'resource_group': args.resource_group, + 'workspace': args.workspace + } + rgs = { + 'mdl_path': model_path, + 'model_name': args.model_name + } + + # printing out args for posterity + for i in wsrgs: + if i == 'service_principal_password': + print('{} => **********'.format(i)) + else: + print('{} => {}'.format(i, wsrgs[i])) + + for i in rgs: + print('{} => {}'.format(i, rgs[i])) + + with(open(str(params_path), 'r')) as f: + tags = json.load(f) + + print('\n\nUsing the following tags:') + for tag in tags: + print('{} => {}'.format(tag, tags[tag])) + + rgs['tgs'] = tags + + workspc = get_ws(**wsrgs) + rgs['ws'] = workspc + run(**rgs) + + # python register.py --model_path v --model_name c --tenant_id c + # --service_principal_id v --service_principal_password v + # --subscription_id v --resource_group x --workspace c diff --git a/pipelines/azurepipeline/code/training/Dockerfile b/pipelines/azurepipeline/code/training/Dockerfile old mode 100644 new mode 100755 diff --git a/pipelines/azurepipeline/code/training/train.py b/pipelines/azurepipeline/code/training/train.py old mode 100644 new mode 100755 index 885608268..a67f23f63 --- a/pipelines/azurepipeline/code/training/train.py +++ b/pipelines/azurepipeline/code/training/train.py @@ -13,188 +13,199 @@ def info(msg, char="#", width=75): - print("") - print(char * width) - print(char + " %0*s" % ((-1 * width) + 5, msg) + char) - print(char * width) + print("") + print(char * width) + print(char + " %0*s" % ((-1 * width) + 5, msg) + char) + print(char * width) def check_dir(path): - if not os.path.exists(path): - os.makedirs(path) - return Path(path).resolve(strict=False) + if not os.path.exists(path): + os.makedirs(path) + return Path(path).resolve(strict=False) def process_image(path, label, img_size): - img_raw = tf.io.read_file(path) - img_tensor = tf.image.decode_jpeg(img_raw, channels=3) - img_final = tf.image.resize(img_tensor, [img_size, img_size]) / 255 - return img_final, label + img_raw = tf.io.read_file(path) + img_tensor = tf.image.decode_jpeg(img_raw, channels=3) + img_final = tf.image.resize(img_tensor, [img_size, img_size]) / 255 + return img_final, label def load_dataset(base_path, dset, split=None): - # normalize splits - if split is None: - split = [8, 1, 1] - splits = np.array(split) / np.sum(np.array(split)) - - # find labels - parent folder names - labels = {} - for (_, dirs, _) in os.walk(base_path): - print('found {}'.format(dirs)) - labels = {k: v for (v, k) in enumerate(dirs)} - print('using {}'.format(labels)) - break + # normalize splits + if split is None: + split = [8, 1, 1] + splits = np.array(split) / np.sum(np.array(split)) + + # find labels - parent folder names + labels = {} + for (_, dirs, _) in os.walk(base_path): + print('found {}'.format(dirs)) + labels = {k: v for (v, k) in enumerate(dirs)} + print('using {}'.format(labels)) + break - # load all files along with idx label - print('loading dataset from {}'.format(dset)) - with open(dset, 'r') as d: - data = [(str(Path(line.strip()).absolute()), - labels[Path(line.strip()).parent.name]) for line in d.readlines()] + # load all files along with idx label + print('loading dataset from {}'.format(dset)) + with open(dset, 'r') as d: + data = [(str(Path(line.strip()).absolute()), + labels[Path(line.strip()).parent.name]) for line in d.readlines()] # noqa: E501 - print('dataset size: {}\nsuffling data...'.format(len(data))) + print('dataset size: {}\nsuffling data...'.format(len(data))) - # shuffle data - shuffle(data) + # shuffle data + shuffle(data) - print('splitting data...') - # split data - train_idx = int(len(data) * splits[0]) + print('splitting data...') + # split data + train_idx = int(len(data) * splits[0]) - return data[:train_idx] + return data[:train_idx] # @print_info def run( - dpath, - img_size=160, - epochs=10, - batch_size=32, - learning_rate=0.0001, - output='model', - dset=None): - img_shape = (img_size, img_size, 3) + dpath, + img_size=160, + epochs=10, + batch_size=32, + learning_rate=0.0001, + output='model', + dset=None): + img_shape = (img_size, img_size, 3) - info('Loading Data Set') - # load dataset - train = load_dataset(dpath, dset) + info('Loading Data Set') + # load dataset + train = load_dataset(dpath, dset) - # training data - train_data, train_labels = zip(*train) - train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)), - Dataset.from_tensor_slices(list(train_labels)), - Dataset.from_tensor_slices([img_size]*len(train_data)))) + # training data + train_data, train_labels = zip(*train) + train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)), + Dataset.from_tensor_slices(list(train_labels)), + Dataset.from_tensor_slices([img_size]*len(train_data)))) - train_ds = train_ds.map(map_func=process_image, - num_parallel_calls=5) + print(train_ds) + train_ds = train_ds.map(map_func=process_image, + num_parallel_calls=5) - train_ds = train_ds.apply(tf.data.experimental.ignore_errors()) + train_ds = train_ds.apply(tf.data.experimental.ignore_errors()) - train_ds = train_ds.batch(batch_size) - train_ds = train_ds.prefetch(buffer_size=5) - train_ds = train_ds.repeat() + train_ds = train_ds.batch(batch_size) + train_ds = train_ds.prefetch(buffer_size=5) + train_ds = train_ds.repeat() - # model - info('Creating Model') - base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape, - include_top=False, - weights='imagenet') - base_model.trainable = True + # model + info('Creating Model') + base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape, + include_top=False, + weights='imagenet') + base_model.trainable = True - model = tf.keras.Sequential([ - base_model, - tf.keras.layers.GlobalAveragePooling2D(), - tf.keras.layers.Dense(1, activation='sigmoid') - ]) + model = tf.keras.Sequential([ + base_model, + tf.keras.layers.GlobalAveragePooling2D(), + tf.keras.layers.Dense(1, activation='sigmoid') + ]) - model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate), - loss='binary_crossentropy', - metrics=['accuracy']) + model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate), + loss='binary_crossentropy', + metrics=['accuracy']) - model.summary() + model.summary() - # training - info('Training') - steps_per_epoch = math.ceil(len(train) / batch_size) - model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch) + # training + info('Training') + steps_per_epoch = math.ceil(len(train) / batch_size) - # save model - info('Saving Model') + model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch) - # check existence of base model folder - output = check_dir(output) + # save model + info('Saving Model') - print('Serializing into saved_model format') - tf.saved_model.save(model, str(output)) - print('Done!') + # check existence of base model folder + output = check_dir(output) - # add time prefix folder - file_output = str(Path(output).joinpath('latest.h5')) - print('Serializing h5 model to:\n{}'.format(file_output)) - model.save(file_output) + print('Serializing into saved_model format') + tf.saved_model.save(model, str(output)) + print('Done!') - return generate_hash(file_output, 'kf_pipeline') + # add time prefix folder + file_output = str(Path(output).joinpath('latest.h5')) + print('Serializing h5 model to:\n{}'.format(file_output)) + model.save(file_output) + + return generate_hash(file_output, 'kf_pipeline') def generate_hash(dfile, key): - print('Generating hash for {}'.format(dfile)) - m = hmac.new(str.encode(key), digestmod=hashlib.sha256) - BUF_SIZE = 65536 - with open(str(dfile), 'rb') as myfile: - while True: - data = myfile.read(BUF_SIZE) - if not data: - break - m.update(data) + print('Generating hash for {}'.format(dfile)) + m = hmac.new(str.encode(key), digestmod=hashlib.sha256) + BUF_SIZE = 65536 + with open(str(dfile), 'rb') as myfile: + while True: + data = myfile.read(BUF_SIZE) + if not data: + break + m.update(data) - return m.hexdigest() + return m.hexdigest() if __name__ == "__main__": - parser = argparse.ArgumentParser(description='transfer learning for binary image task') - parser.add_argument('-s', '--base_path', help='directory to base data', default='../../data') - parser.add_argument('-d', '--data', help='directory to training and test data', default='train') - parser.add_argument('-e', '--epochs', help='number of epochs', default=10, type=int) - parser.add_argument('-b', '--batch', help='batch size', default=32, type=int) - parser.add_argument('-i', '--image_size', help='image size', default=160, type=int) - parser.add_argument('-l', '--lr', help='learning rate', default=0.0001, type=float) - parser.add_argument('-o', '--outputs', help='output directory', default='model') - parser.add_argument('-f', '--dataset', help='cleaned data listing') - args = parser.parse_args() - - info('Using TensorFlow v.{}'.format(tf.__version__)) - - data_path = Path(args.base_path).joinpath(args.data).resolve(strict=False) - target_path = Path(args.base_path).resolve(strict=False).joinpath(args.outputs) - dataset = Path(args.base_path).joinpath(args.dataset) - image_size = args.image_size - - params = Path(args.base_path).joinpath('params.json') - - args = { - "dpath": str(data_path), - "img_size": image_size, - "epochs": args.epochs, - "batch_size": args.batch, - "learning_rate": args.lr, - "output": str(target_path), - "dset": str(dataset) - } - - dataset_signature = generate_hash(dataset, 'kf_pipeline') - # printing out args for posterity - for i in args: - print('{} => {}'.format(i, args[i])) - - model_signature = run(**args) - - args['dataset_signature'] = dataset_signature.upper() - args['model_signature'] = model_signature.upper() - args['model_type'] = 'tfv2-MobileNetV2' - print('Writing out params...', end='') - with open(str(params), 'w') as f: - json.dump(args, f) - - print(' Saved to {}'.format(str(params))) - - # python train.py -d train -e 3 -b 32 -l 0.0001 -o model -f train.txt + parser = argparse.ArgumentParser( + description='transfer learning for binary image task') + parser.add_argument('-s', '--base_path', + help='directory to base data', default='../../data') + parser.add_argument( + '-d', '--data', help='directory to training and test data', default='train') # noqa: E501 + parser.add_argument( + '-e', '--epochs', help='number of epochs', default=10, type=int) + parser.add_argument('-b', '--batch', help='batch size', + default=32, type=int) + parser.add_argument('-i', '--image_size', + help='image size', default=160, type=int) + parser.add_argument('-l', '--lr', help='learning rate', + default=0.0001, type=float) + parser.add_argument('-o', '--outputs', + help='output directory', default='model') + parser.add_argument('-f', '--dataset', help='cleaned data listing') + args = parser.parse_args() + + info('Using TensorFlow v.{}'.format(tf.__version__)) + + data_path = Path(args.base_path).joinpath(args.data).resolve(strict=False) + target_path = Path(args.base_path).resolve( + strict=False).joinpath(args.outputs) + dataset = Path(args.base_path).joinpath(args.dataset) + image_size = args.image_size + + params = Path(args.base_path).joinpath('params.json') + + args = { + "dpath": str(data_path), + "img_size": image_size, + "epochs": args.epochs, + "batch_size": args.batch, + "learning_rate": args.lr, + "output": str(target_path), + "dset": str(dataset) + } + + dataset_signature = generate_hash(dataset, 'kf_pipeline') + # printing out args for posterity + for i in args: + print('{} => {}'.format(i, args[i])) + + model_signature = run(**args) + + args['dataset_signature'] = dataset_signature.upper() + args['model_signature'] = model_signature.upper() + args['model_type'] = 'tfv2-MobileNetV2' + print('Writing out params...', end='') + with open(str(params), 'w') as f: + json.dump(args, f) + + print(' Saved to {}'.format(str(params))) + + # python train.py -d train -e 3 -b 32 -l 0.0001 -o model -f train.txt