Skip to content

Commit

Permalink
Fixes. End-to-End Pipeline Example on Azure (kubeflow#788)
Browse files Browse the repository at this point in the history
* Fixes

* Fixes
  • Loading branch information
eedorenko authored Apr 30, 2020
1 parent b8aa657 commit 0609ad4
Show file tree
Hide file tree
Showing 8 changed files with 397 additions and 373 deletions.
Empty file modified pipelines/azurepipeline/code/deploy/Dockerfile
100644 → 100755
Empty file.
115 changes: 58 additions & 57 deletions pipelines/azurepipeline/code/deploy/score.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -11,83 +11,84 @@


def init():
if Model.get_model_path('tacosandburritos'):
model_path = Model.get_model_path('tacosandburritos')
else:
model_path = '/model/latest.h5'
global model
if Model.get_model_path('tacosandburritos'):
model_path = Model.get_model_path('tacosandburritos')
else:
model_path = '/model/latest.h5'

print('Attempting to load model')
model = tf.keras.models.load_model(model_path)
model.summary()
print('Done!')
print('Attempting to load model')
model = tf.keras.models.load_model(model_path)
model.summary()
print('Done!')

print('Initialized model "{}" at {}'.format(model_path, datetime.datetime.now()))
return model
print('Initialized model "{}" at {}'.format(
model_path, datetime.datetime.now()))


def run(raw_data, model):
prev_time = time.time()
def run(raw_data):
prev_time = time.time()

post = json.loads(raw_data)
img_path = post['image']
post = json.loads(raw_data)
img_path = post['image']

current_time = time.time()
current_time = time.time()

tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1) # [0][0]
print(o)
o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = {
'time': inference_time.total_seconds(),
'prediction': 'burrito' if o > 0.5 else 'tacos',
'scores': str(o)
}
tensor = process_image(img_path, 160)
t = tf.reshape(tensor, [-1, 160, 160, 3])
o = model.predict(t, steps=1) # [0][0]
print(o)
o = o[0][0]
inference_time = datetime.timedelta(seconds=current_time - prev_time)
payload = {
'time': inference_time.total_seconds(),
'prediction': 'burrito' if o > 0.5 else 'tacos',
'scores': str(o)
}

print('Input ({}), Prediction ({})'.format(post['image'], payload))
print('Input ({}), Prediction ({})'.format(post['image'], payload))

return payload
return payload


def process_image(path, image_size):
# Extract image (from web or path)
if path.startswith('http'):
response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content)))
else:
img = np.array(Image.open(path))
# Extract image (from web or path)
if path.startswith('http'):
response = requests.get(path)
img = np.array(Image.open(BytesIO(response.content)))
else:
img = np.array(Image.open(path))

img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
# tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)
# tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final


def info(msg, char="#", width=75):
print("")
print(char * width)
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
print(char * width)
print("")
print(char * width)
print(char + " %0*s" % ((-1 * width) + 5, msg) + char)
print(char * width)


if __name__ == "__main__":
images = {
'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg',
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg'
}
images = {
'tacos': 'https://c1.staticflickr.com/5/4022/4401140214_f489c708f0_b.jpg', # noqa: E501
'burrito': 'https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg' # noqa: E501
}

my_model = init()
init()

for k, v in images.items():
print('{} => {}'.format(k, v))
for k, v in images.items():
print('{} => {}'.format(k, v))

info('Taco Test')
taco = json.dumps({'image': images['tacos']})
print(taco)
run(taco, my_model)
info('Taco Test')
taco = json.dumps({'image': images['tacos']})
print(taco)
run(taco)

info('Burrito Test')
burrito = json.dumps({'image': images['burrito']})
print(burrito)
run(burrito, my_model)
info('Burrito Test')
burrito = json.dumps({'image': images['burrito']})
print(burrito)
run(burrito)
Empty file modified pipelines/azurepipeline/code/preprocess/Dockerfile
100644 → 100755
Empty file.
188 changes: 96 additions & 92 deletions pipelines/azurepipeline/code/preprocess/data.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -8,111 +8,115 @@


def check_dir(path):
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)
if not os.path.exists(path):
os.makedirs(path)
return Path(path).resolve(strict=False)


def download(source, target, force_clear=False):
if force_clear and os.path.exists(target):
print('Removing {}...'.format(target))
shutil.rmtree(target)
if force_clear and os.path.exists(target):
print('Removing {}...'.format(target))
shutil.rmtree(target)

check_dir(target)
check_dir(target)

targt_file = str(Path(target).joinpath('data.zip'))
if os.path.exists(targt_file) and not force_clear:
print('data already exists, skipping download')
return
targt_file = str(Path(target).joinpath('data.zip'))
if os.path.exists(targt_file) and not force_clear:
print('data already exists, skipping download')
return

if source.startswith('http'):
print("Downloading from {} to {}".format(source, target))
wget.download(source, targt_file)
print("Done!")
else:
print("Copying from {} to {}".format(source, target))
shutil.copyfile(source, targt_file)
if source.startswith('http'):
print("Downloading from {} to {}".format(source, target))
wget.download(source, targt_file)
print("Done!")
else:
print("Copying from {} to {}".format(source, target))
shutil.copyfile(source, targt_file)

print('Unzipping {}'.format(targt_file))
zipr = zipfile.ZipFile(targt_file)
zipr.extractall(target)
zipr.close()
print('Unzipping {}'.format(targt_file))
zipr = zipfile.ZipFile(targt_file)
zipr.extractall(target)
zipr.close()


def process_image(path, image_size=160):
img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final
img_raw = tf.io.read_file(path)
img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
img_final = tf.image.resize(img_tensor, [image_size, image_size]) / 255
return img_final


def walk_images(path, image_size=160):
imgs = []
print('Scanning {}'.format(path))
# find subdirectories in base path
# (they should be the labels)
labels = []
for (_, dirs, _) in os.walk(path):
print('Found {}'.format(dirs))
labels = dirs
break

for d in labels:
tmp_path = os.path.join(path, d)
print('Processing {}'.format(tmp_path))
# only care about files in directory
for item in os.listdir(tmp_path):
if not item.lower().endswith('.jpg'):
print('skipping {}'.format(item))
continue

image = os.path.join(tmp_path, item)
try:
img = process_image(image, image_size)
assert img.shape[2] == 3, "Invalid channel count"
# write out good images
imgs.append(image)
except img.shape[2] != 3:
print('{}\n'.format(image))

return imgs
imgs = []
print('Scanning {}'.format(path))
# find subdirectories in base path
# (they should be the labels)
labels = []
for (_, dirs, _) in os.walk(path):
print('Found {}'.format(dirs))
labels = dirs
break

for d in labels:
tmp_path = os.path.join(path, d)
print('Processing {}'.format(tmp_path))
# only care about files in directory
for item in os.listdir(tmp_path):
if not item.lower().endswith('.jpg'):
print('skipping {}'.format(item))
continue

image = os.path.join(tmp_path, item)
try:
img = process_image(image, image_size)
assert img.shape[2] == 3, "Invalid channel count"
# write out good images
imgs.append(image)
except img.shape[2] != 3:
print('{}\n'.format(image))

return imgs


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='data cleaning for binary image task')
parser.add_argument('-b', '--base_path', help='directory to base data', default='../../data')
parser.add_argument('-d', '--data', help='directory to training data', default='train')
parser.add_argument('-t', '--target', help='target file to hold good data', default='train.txt')
parser.add_argument('-i', '--img_size', help='target image size to verify', default=160, type=int)
parser.add_argument('-z', '--zipfile', help='source data zip file', default='../../tacodata.zip')
parser.add_argument('-f', '--force',
help='force clear all data', default=False, action='store_true')
args = parser.parse_args()
print(args)

print('Using TensorFlow v.{}'.format(tf.__version__))

base_path = Path(args.base_path).resolve(strict=False)
print('Base Path: {}'.format(base_path))
data_path = base_path.joinpath(args.data).resolve(strict=False)
print('Train Path: {}'.format(data_path))
target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
print('Train File: {}'.format(target_path))
zip_path = args.zipfile

print('Acquiring data...')
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
str(base_path), args.force)

if os.path.exists(str(target_path)):
print('dataset text file already exists, skipping check')
else:
print('Testing images...')
images = walk_images(str(data_path), args.img_size)

# save file
print('writing dataset to {}'.format(target_path))
with open(str(target_path), 'w+') as f:
f.write('\n'.join(images))

# python data.py -z https://aiadvocate.blob.core.windows.net/public/tacodata.zip -t train.txt
parser = argparse.ArgumentParser(
description='data cleaning for binary image task')
parser.add_argument('-b', '--base_path',
help='directory to base data', default='../../data')
parser.add_argument(
'-d', '--data', help='directory to training data', default='train')
parser.add_argument(
'-t', '--target', help='target file to hold good data', default='train.txt') # noqa: E501
parser.add_argument(
'-i', '--img_size', help='target image size to verify', default=160, type=int) # noqa: E501
parser.add_argument(
'-z', '--zipfile', help='source data zip file', default='../../tacodata.zip') # noqa: E501
parser.add_argument('-f', '--force',
help='force clear all data', default=False, action='store_true') # noqa: E501
args = parser.parse_args()
print(args)

print('Using TensorFlow v.{}'.format(tf.__version__))

base_path = Path(args.base_path).resolve(strict=False)
print('Base Path: {}'.format(base_path))
data_path = base_path.joinpath(args.data).resolve(strict=False)
print('Train Path: {}'.format(data_path))
target_path = Path(base_path).resolve(strict=False).joinpath(args.target)
print('Train File: {}'.format(target_path))
zip_path = args.zipfile

print('Acquiring data...')
download('https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
str(base_path), args.force)

if os.path.exists(str(target_path)):
print('dataset text file already exists, skipping check')
else:
print('Testing images...')
images = walk_images(str(data_path), args.img_size)

# save file
print('writing dataset to {}'.format(target_path))
with open(str(target_path), 'w+') as f:
f.write('\n'.join(images))
Empty file modified pipelines/azurepipeline/code/register/Dockerfile
100644 → 100755
Empty file.
Loading

0 comments on commit 0609ad4

Please sign in to comment.