diff --git a/embedding-calculator/Dockerfile b/embedding-calculator/Dockerfile index 7af6640d79..9c3dc8ff1f 100644 --- a/embedding-calculator/Dockerfile +++ b/embedding-calculator/Dockerfile @@ -29,7 +29,7 @@ ARG GPU_IDX=-1 ENV GPU_IDX=$GPU_IDX INTEL_OPTIMIZATION=$INTEL_OPTIMIZATION ARG FACE_DETECTION_PLUGIN="facenet.FaceDetector" ARG CALCULATION_PLUGIN="facenet.Calculator" -ARG EXTRA_PLUGINS="rude_carnie.AgeDetector,rude_carnie.GenderDetector" +ARG EXTRA_PLUGINS="" ENV FACE_DETECTION_PLUGIN=$FACE_DETECTION_PLUGIN CALCULATION_PLUGIN=$CALCULATION_PLUGIN \ EXTRA_PLUGINS=$EXTRA_PLUGINS COPY src src diff --git a/embedding-calculator/src/services/facescan/plugins/facenet/facenet.py b/embedding-calculator/src/services/facescan/plugins/facenet/facenet.py index 8c9b56785c..dce7321978 100644 --- a/embedding-calculator/src/services/facescan/plugins/facenet/facenet.py +++ b/embedding-calculator/src/services/facescan/plugins/facenet/facenet.py @@ -18,7 +18,10 @@ from typing import List import numpy as np +import tensorflow as tf +from tensorflow.python.platform import gfile from cached_property import cached_property +from facenet.src.align import detect_face from src.constants import ENV from src.services.dto.bounding_box import BoundingBoxDTO @@ -52,9 +55,6 @@ class FaceDetector(base.BaseFaceDetector): @cached_property def _face_detection_nets(self): - import tensorflow as tf - from facenet.src.align import detect_face - with tf.Graph().as_default(): sess = tf.Session() return _FaceDetectionNets(*detect_face.create_mtcnn(sess, None)) @@ -63,8 +63,6 @@ def crop_face(self, img: Array3D, box: BoundingBoxDTO) -> Array3D: return squish_img(crop_img(img, box), (self.IMAGE_SIZE, self.IMAGE_SIZE)) def find_faces(self, img: Array3D, det_prob_threshold: float = None) -> List[BoundingBoxDTO]: - from facenet.src.align import detect_face - if det_prob_threshold is None: det_prob_threshold = self.det_prob_threshold assert 0 <= det_prob_threshold <= 1 @@ -119,8 +117,6 @@ def calc_embedding(self, face_img: Array3D) -> Array3D: @cached_property def _embedding_calculator(self): - import tensorflow as tf - from tensorflow.python.platform import gfile with tf.Graph().as_default() as graph: graph_def = tf.GraphDef() with gfile.FastGFile(self.ml_model_file, 'rb') as f: diff --git a/embedding-calculator/src/services/facescan/plugins/rude_carnie/__init__.py b/embedding-calculator/src/services/facescan/plugins/rude_carnie/__init__.py deleted file mode 100644 index 3483f084f5..0000000000 --- a/embedding-calculator/src/services/facescan/plugins/rude_carnie/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2020 the original author or authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. - -requirements = ('tensorflow~=1.15.4',) diff --git a/embedding-calculator/src/services/facescan/plugins/rude_carnie/rude_carnie.py b/embedding-calculator/src/services/facescan/plugins/rude_carnie/rude_carnie.py deleted file mode 100644 index c77e59a30f..0000000000 --- a/embedding-calculator/src/services/facescan/plugins/rude_carnie/rude_carnie.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2020 the original author or authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. - -from functools import lru_cache -from typing import Tuple, Union - -import numpy as np -import tensorflow as tf - -from src.services.imgtools.types import Array3D -from src.services.facescan.plugins import base, managers -from src.services.dto import plugin_result -from srcext.rude_carnie.model import inception_v3, get_checkpoint - - -def prewhiten(img): - """ Normalize image.""" - mean = np.mean(img) - std = np.std(img) - std_adj = np.maximum(std, 1.0 / np.sqrt(img.size)) - y = np.multiply(np.subtract(img, mean), 1 / std_adj) - return y - - -@lru_cache(maxsize=2) -def _get_rude_carnie_model(labels: Tuple, model_dir: str): - - IMAGE_SIZE = managers.plugin_manager.detector.IMAGE_SIZE - - g = tf.Graph() - with g.as_default(): - sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) - - images = tf.placeholder(tf.float32, [None, IMAGE_SIZE, IMAGE_SIZE, 3]) - logits = inception_v3(len(labels), images, 1, False) - tf.global_variables_initializer() - - model_checkpoint_path, global_step = get_checkpoint(model_dir, None, 'checkpoint') - - saver = tf.train.Saver() - saver.restore(sess, model_checkpoint_path) - softmax_output = tf.nn.softmax(logits) - - def get_value(img: Array3D) -> Tuple[Union[str, Tuple], float]: - img = np.expand_dims(prewhiten(img), 0) - output = sess.run(softmax_output, feed_dict={images:img})[0] - best_i = int(np.argmax(output)) - return labels[best_i], output[best_i] - return get_value - - -class AgeDetector(base.BasePlugin): - slug = 'age' - LABELS = ((0, 2), (4, 6), (8, 12), (15, 20), (25, 32), (38, 43), (48, 53), (60, 100)) - ml_models = ( - ('22801', '1PxK72O-NROEz8pUGDDFRDYF4AABbvWiC'), - ) - - def __call__(self, face_img: Array3D): - model = _get_rude_carnie_model(self.LABELS, self.ml_model.path) - value, probability = model(face_img) - return plugin_result.AgeDTO(age=value, age_probability=probability) - - -class GenderDetector(base.BasePlugin): - slug = 'gender' - LABELS = ('male', 'female') - ml_models = ( - ('21936', '1j9B76U3b4_F9e8-OKlNdOBQKa2ziGe_-'), - ) - - def __call__(self, face_img: Array3D): - model = _get_rude_carnie_model(self.LABELS, self.ml_model.path) - value, probability = model(face_img) - return plugin_result.GenderDTO(gender=value, gender_probability=probability) diff --git a/embedding-calculator/srcext/rude_carnie/README.md b/embedding-calculator/srcext/rude_carnie/README.md deleted file mode 100644 index 4317b90887..0000000000 --- a/embedding-calculator/srcext/rude_carnie/README.md +++ /dev/null @@ -1,227 +0,0 @@ -Rude Carnie: Age and Gender Deep Learning with TensorFlow -========================================================== - -## Goal - -Do face detection and age and gender classification on pictures - -### Name - -http://www.someecards.com/news/getting-old/howoldnet-takes-your-picture-and-uses-algorithms-to-guess-your-age-like-a-rude-carnie/ - -### Currently Supported Models - - - _Gil Levi and Tal Hassner, Age and Gender Classification Using Convolutional Neural Networks, IEEE Workshop on Analysis and Modeling of Faces and Gestures (AMFG), at the IEEE Conf. on Computer Vision and Pattern Recognition (CVPR), Boston, June 2015_ - - - http://www.openu.ac.il/home/hassner/projects/cnn_agegender/ - - https://github.com/GilLevi/AgeGenderDeepLearning - - - Inception v3 with fine-tuning - - This will start with an inception v3 checkpoint, and fine-tune for either age or gender detection - -### Running - -There are several ways to use a pre-existing checkpoint to do age or gender classification. By default, the code will simply assume that the image you provided has a face in it, and will run that image through a multi-pass classification using the corners and center. - - The --class_type parameter controls which task, and the --model_dir controls which checkpoint to restore. There are advanced parameters for the checkpoint basename (--checkpoint) and the requested step number if there are multiple checkpoints in the directory (--requested_step) - -Here is a run using Age classification on the latest checkpoint in a directory using 12-look (all corners + center + resized, along with flipped versions) averaging: - -``` -$ python guess.py --model_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/age_test_fold_is_1/run-20854 --filename /home/dpressel/Downloads/portraits/prince.jpg -``` - -You can also tell it to do a single image classification without the corners and center crop. Here is a run using Age classification on the latest checkpoint in a directory, using a single look at the image - -``` -$ python guess.py --model_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/age_test_fold_is_1/run-20854 --filename /home/dpressel/Downloads/portraits/prince.jpg --single_look -``` - -Here is a version using gender, where we restore the checkpoint from a specific step: - -``` -$ python guess.py --model_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/gen_test_fold_is_0/run-31376 --class_type gender --requested_step 9999 --filename /home/dpressel/Downloads/portraits/prince.jpg -``` - -#### Face Detection - -If you have an image with one or more frontal faces, you can run a face-detector upfront, and each detected face will be chipped out and run through classification individually. A variety of face detectors are supported including OpenCV, dlib and YOLO - -OpenCV: - -``` -python guess.py --model_type inception --model_dir /data/xdata/rude-carnie/checkpoints/age/inception/22801 --filename /home/dpressel/Downloads/portraits/p_and_d.jpg --face_detection_model /usr/share/opencv/haarcascades/haarcascade_frontalface_default.xml -``` - -To use dlib, you will need to install it and grab down the model: - -``` -wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 -bunzip2 bunzip2 shape_predictor_68_face_landmarks.dat.bz2 -pip install dlib -python guess.py --model_type inception --model_dir /data/xdata/rude-carnie/checkpoints/age/inception/22801 --filename ~/Downloads/portraits/halloween15.jpg --face_detection_type dlib --face_detection_model shape_predictor_68_face_landmarks.dat -``` - -YOLO tiny: - -``` -python guess.py --model_type inception --model_dir /data/xdata/rude-carnie/checkpoints/age/inception/22801 --filename /home/dpressel/Downloads/portraits/p_and_d.jpg --face_detection_model weights/YOLO_tiny.ckpt --face_detection_type yolo_tiny -``` - -If you want to run YOLO, get the tiny checkpoint from here - -https://github.com/gliese581gg/YOLO_tensorflow/ - -The YOLO detection code is based heavily on: - -https://github.com/gliese581gg/YOLO_tensorflow/blob/master/YOLO_tiny_tf.py - -#### Prediction with fine-tuned inception model - -If you want to use guess.py with an inception fine-tuned model, the usage is the same as above, but remember to pass _--model_type inception_: - -``` -$ python guess.py --model_type inception --model_dir /data/xdata/rude-carnie/checkpoints/age/inception/22801 --filename /home/dpressel/Downloads/portraits/prince.jpg - -``` - -Here is a gender guess: - -``` -$ python guess.py --class_type gender --model_type inception --model_dir /data/xdata/rude-carnie/checkpoints/gender/inception/21936/ --filename /home/dpressel/Downloads/portraits/Dan-Pressel-3.png -I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcublas.so.7.5 locally -I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcudnn.so.5 locally -I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcufft.so.7.5 locally -I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcuda.so.1 locally -I tensorflow/stream_executor/dso_loader.cc:135] successfully opened CUDA library libcurand.so.7.5 locally -... -Executing on /cpu:0 -selected (fine-tuning) inception model -/data/xdata/rude-carnie/checkpoints/gender/inception/21936/checkpoint-14999 -I tensorflow/core/common_runtime/gpu/gpu_device.cc:975] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 980M, pci bus id: 0000:01:00.0) -Running file /home/dpressel/Downloads/portraits/Dan-Pressel-3.png -Converting PNG to JPEG for /home/dpressel/Downloads/portraits/Dan-Pressel-3.png -Running multi-cropped image -Guess @ 1 M, prob = 0.99 - -``` - -### Pre-trained Checkpoints -You can find a pre-trained age checkpoint for inception here: - -https://drive.google.com/drive/folders/0B8N1oYmGLVGWbDZ4Y21GLWxtV1E - -A pre-trained gender checkpoint for inception is available here: - -https://drive.google.com/drive/folders/0B8N1oYmGLVGWemZQd3JMOEZvdGs - -### Training - -You can use your own training data if you wish. This is a little easier to do with gender, since there are many ways that you could come up with a training set for this, but it has been developed specifically with the Adience corpus in mind, and uses the pre-splits created by Levi and Hassner. - -#### Download Adience data and folds - -The Adience data page is here, where you can download the aligned dataset used in the example usage below: - -http://www.openu.ac.il/home/hassner/Adience/data.html - -Get the folds, we dont need to run their preprocessing scripts since we are doing this in the preproc.py script using tensorflow - -``` -git clone https://github.com/GilLevi/AgeGenderDeepLearning -``` - -#### Pre-process data for training - -First you will need to preprocess the data using preproc.py. This assumes that there is a directory that is passed for an absolute directory, as well as a file containing a list of the training data images and the label itself, and the validation data, and test data if applicable. The preproc.py program generates 'shards' for each of the datasets, each containing JPEG encoded RGB images of size 256x256 - -``` -$ python preproc.py --fold_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/train_val_txt_files_per_fold/test_fold_is_0 --train_list age_train.txt --valid_list age_val.txt --data_dir /data/xdata/age-gender/aligned --output_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/age_test_fold_is_0 - -``` - -The training (etc) lists are expected in the --fold_dir, and they contain first the relative path from the --data_dir and second the numeric label: - -``` -dpressel@dpressel:~/dev/work/3csi-rd/dpressel/sh$ head /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/train_val_txt_files_per_fold/test_fold_is_0/age_train.txt -10069023@N00/landmark_aligned_face.1924.10335948845_0d22490234_o.jpg 5 -7464014@N04/landmark_aligned_face.961.10109081873_8060c8b0a5_o.jpg 4 -28754132@N06/landmark_aligned_face.608.11546494564_2ec3e89568_o.jpg 2 -10543088@N02/landmark_aligned_face.662.10044788254_2091a56ec3_o.jpg 3 -66870968@N06/landmark_aligned_face.1227.11326221064_32114bf26a_o.jpg 4 -7464014@N04/landmark_aligned_face.963.10142314254_8e96a97459_o.jpg 4 -113525713@N07/landmark_aligned_face.1016.11784555666_8d43b6c493_o.jpg 3 -30872264@N00/landmark_aligned_face.603.9575166089_f5f9cecc8c_o.jpg 5 -10897942@N03/landmark_aligned_face.633.10372582914_382144ffe8_o.jpg 3 -10792106@N03/landmark_aligned_face.522.11039121906_b047c90cc1_o.jpg 3 -``` - -Gender is done much the same way: - -``` -$ python preproc.py --fold_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/train_val_txt_files_per_fold/test_fold_is_0 --train_list gender_train.txt --valid_list gender_val.txt --data_dir /data/xdata/age-gender/aligned --output_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/gen_test_fold_is_0 -``` - -#### Train the model (Levi/Hassner) - -Now that we have generated the training and validation shards, we can start training the program. Here is a simple way to call the driver program to run using SGD with momentum to train: - -``` -$ python train.py --train_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/age_test_fold_is_0 - -``` - -Once again, gender is done much the same way. Just be careful that you are running on the the preprocessed gender data, not the age data. Here we use a lower initial learning rate of `0.001` - -``` - -$ python train.py --train_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/gen_test_fold_is_0 --max_steps 30000 --eta 0.001 - -``` - -#### Train the model (fine-tuned Inception) - -Its also easy to use this codebase to fine-tune an pre-trained inception checkpoint for age or gender dectection. Here is an example for how to do this: - -``` -$ python train.py --train_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/age_test_fold_is_0 --max_steps 15000 --model_type inception --batch_size 32 --eta 0.001 --dropout 0.5 --pre_model /data/pre-trained/inception_v3.ckpt -``` - -You can get the inception_v3.ckpt like so: - -``` -$ wget http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz -``` - -#### Monitoring the training - -You can easily monitor the job run by launching tensorboard with the --logdir specified in the program's initial output: - -``` -tensorboard --logdir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/gen_test_fold_is_0/run-31376 - -``` -Then navigate to http://127.0.0.1:6006/ in your browser to see results. The first tab (events) shows the loss over time, and the second shows the images that the network is seeing during training on batches. - -#### Evaluate the model - -The evaluation program is written to be run alongside the training or after the fact. If you run it after the fact, you can specify a list of checkpoint steps to evaluate in sequence. If you run while training is working, it will periodically rerun itself on the latest checkpoint. - -Here is an example of running evaluation continuously. The --run_id will live in the --train_dir (run-) and is the product of a single run of training (the id is actually the PID used in training): - -``` -$ python eval.py --run_id 15918 --train_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/gen_test_fold_is_0/ --eval_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/eval_gen_test_fold_is_0 - -``` - -Here is an after-the-fact run of eval that loops over the specified checkpoints and evaluates the performance on each: - -``` -$ python eval.py --run_id 25079 --train_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/age_test_fold_is_0/ --eval_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/eval_age_test_fold_is_0 --requested_step_seq 7000,8000,9000,9999 -``` - -To monitor the fine-tuning of an inception model, the call is much the same. Just be sure to pass _--model_type inception_ - -``` -$ python eval.py --run_id 8128 --train_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/age_test_fold_is_0/ --eval_dir /home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/eval_age_test_fold_is_0 --model_type inception -``` diff --git a/embedding-calculator/srcext/rude_carnie/data.py b/embedding-calculator/srcext/rude_carnie/data.py deleted file mode 100644 index 67f5b661fb..0000000000 --- a/embedding-calculator/srcext/rude_carnie/data.py +++ /dev/null @@ -1,235 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import os -import numpy as np -import tensorflow as tf - -from distutils.version import LooseVersion - -VERSION_GTE_0_12_0 = LooseVersion(tf.__version__) >= LooseVersion('0.12.0') - -# Name change in TF v 0.12.0 -if VERSION_GTE_0_12_0: - standardize_image = tf.image.per_image_standardization -else: - standardize_image = tf.image.per_image_whitening - -def data_files(data_dir, subset): - """Returns a python list of all (sharded) data subset files. - Returns: - python list of all (sharded) data set files. - Raises: - ValueError: if there are not data_files matching the subset. - """ - if subset not in ['train', 'validation']: - print('Invalid subset!') - exit(-1) - - tf_record_pattern = os.path.join(data_dir, '%s-*' % subset) - data_files = tf.gfile.Glob(tf_record_pattern) - print(data_files) - if not data_files: - print('No files found for data dir %s at %s' % (subset, data_dir)) - - exit(-1) - return data_files - -def decode_jpeg(image_buffer, scope=None): - """Decode a JPEG string into one 3-D float image Tensor. - Args: - image_buffer: scalar string Tensor. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor with values ranging from [0, 1). - """ - with tf.op_scope([image_buffer], scope, 'decode_jpeg'): - # Decode the string as an RGB JPEG. - # Note that the resulting image contains an unknown height and width - # that is set dynamically by decode_jpeg. In other words, the height - # and width of image is unknown at compile-time. - image = tf.image.decode_jpeg(image_buffer, channels=3) - - # After this point, all image pixels reside in [0,1) - # until the very end, when they're rescaled to (-1, 1). The various - # adjust_* ops all require this range for dtype float. - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - return image - -def distort_image(image, height, width): - - # Image processing for training the network. Note the many random - # distortions applied to the image. - - distorted_image = tf.random_crop(image, [height, width, 3]) - - #distorted_image = tf.image.resize_images(image, [height, width]) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - - # Because these operations are not commutative, consider randomizing - # the order their operation. - - distorted_image = tf.image.random_brightness(distorted_image, - max_delta=63) - - distorted_image = tf.image.random_contrast(distorted_image, - lower=0.2, upper=1.8) - - return distorted_image - - -def _is_tensor(x): - return isinstance(x, (tf.Tensor, tf.Variable)) - -def eval_image(image, height, width): - return tf.image.resize_images(image, [height, width]) - -def data_normalization(image): - - image = standardize_image(image) - - return image - -def image_preprocessing(image_buffer, image_size, train, thread_id=0): - """Decode and preprocess one image for evaluation or training. - Args: - image_buffer: JPEG encoded string Tensor - train: boolean - thread_id: integer indicating preprocessing thread - Returns: - 3-D float Tensor containing an appropriately scaled image - Raises: - ValueError: if user does not provide bounding box - """ - - image = decode_jpeg(image_buffer) - - if train: - image = distort_image(image, image_size, image_size) - else: - image = eval_image(image, image_size, image_size) - - image = data_normalization(image) - return image - - -def parse_example_proto(example_serialized): - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/filename': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/height': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/width': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - - } - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - return features['image/encoded'], label, features['image/filename'] - -def batch_inputs(data_dir, batch_size, image_size, train, num_preprocess_threads=4, - num_readers=1, input_queue_memory_factor=16): - with tf.name_scope('batch_processing'): - - if train: - files = data_files(data_dir, 'train') - filename_queue = tf.train.string_input_producer(files, - shuffle=True, - capacity=16) - else: - files = data_files(data_dir, 'validation') - filename_queue = tf.train.string_input_producer(files, - shuffle=False, - capacity=1) - if num_preprocess_threads % 4: - raise ValueError('Please make num_preprocess_threads a multiple ' - 'of 4 (%d % 4 != 0).', num_preprocess_threads) - - if num_readers < 1: - raise ValueError('Please make num_readers at least 1') - - # Approximate number of examples per shard. - examples_per_shard = 1024 - # Size the random shuffle queue to balance between good global - # mixing (more examples) and memory use (fewer examples). - # 1 image uses 299*299*3*4 bytes = 1MB - # The default input_queue_memory_factor is 16 implying a shuffling queue - # size: examples_per_shard * 16 * 1MB = 17.6GB - min_queue_examples = examples_per_shard * input_queue_memory_factor - if train: - examples_queue = tf.RandomShuffleQueue( - capacity=min_queue_examples + 3 * batch_size, - min_after_dequeue=min_queue_examples, - dtypes=[tf.string]) - else: - examples_queue = tf.FIFOQueue( - capacity=examples_per_shard + 3 * batch_size, - dtypes=[tf.string]) - - # Create multiple readers to populate the queue of examples. - if num_readers > 1: - enqueue_ops = [] - for _ in range(num_readers): - reader = tf.TFRecordReader() - _, value = reader.read(filename_queue) - enqueue_ops.append(examples_queue.enqueue([value])) - - tf.train.queue_runner.add_queue_runner( - tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops)) - example_serialized = examples_queue.dequeue() - else: - reader = tf.TFRecordReader() - _, example_serialized = reader.read(filename_queue) - - images_labels_fnames = [] - for thread_id in range(num_preprocess_threads): - # Parse a serialized Example proto to extract the image and metadata. - image_buffer, label_index, fname = parse_example_proto(example_serialized) - - image = image_preprocessing(image_buffer, image_size, train, thread_id) - images_labels_fnames.append([image, label_index, fname]) - - images, label_index_batch, fnames = tf.train.batch_join( - images_labels_fnames, - batch_size=batch_size, - capacity=2 * num_preprocess_threads * batch_size) - - images = tf.cast(images, tf.float32) - images = tf.reshape(images, shape=[batch_size, image_size, image_size, 3]) - - # Display the training images in the visualizer. - tf.summary.image('images', images, 20) - - return images, tf.reshape(label_index_batch, [batch_size]), fnames - -def inputs(data_dir, batch_size=128, image_size=227, train=False, num_preprocess_threads=4): - with tf.device('/cpu:0'): - images, labels, filenames = batch_inputs( - data_dir, batch_size, image_size, train, - num_preprocess_threads=num_preprocess_threads, - num_readers=1) - return images, labels, filenames - -def distorted_inputs(data_dir, batch_size=128, image_size=227, num_preprocess_threads=4): - - # Force all input processing onto CPU in order to reserve the GPU for - # the forward inference and back-propagation. - with tf.device('/cpu:0'): - images, labels, filenames = batch_inputs( - data_dir, batch_size, image_size, train=True, - num_preprocess_threads=num_preprocess_threads, - num_readers=1) - return images, labels, filenames diff --git a/embedding-calculator/srcext/rude_carnie/detect.py b/embedding-calculator/srcext/rude_carnie/detect.py deleted file mode 100644 index 68e3e11101..0000000000 --- a/embedding-calculator/srcext/rude_carnie/detect.py +++ /dev/null @@ -1,56 +0,0 @@ -import numpy as np -import cv2 -FACE_PAD = 50 - -class ObjectDetector(object): - def __init__(self): - pass - - def run(self, image_file): - pass - -# OpenCV's cascade object detector -class ObjectDetectorCascadeOpenCV(ObjectDetector): - def __init__(self, model_name, basename='frontal-face', tgtdir='.', min_height_dec=20, min_width_dec=20, - min_height_thresh=50, min_width_thresh=50): - self.min_height_dec = min_height_dec - self.min_width_dec = min_width_dec - self.min_height_thresh = min_height_thresh - self.min_width_thresh = min_width_thresh - self.tgtdir = tgtdir - self.basename = basename - self.face_cascade = cv2.CascadeClassifier(model_name) - - def run(self, image_file): - print(image_file) - img = cv2.imread(image_file) - min_h = int(max(img.shape[0] / self.min_height_dec, self.min_height_thresh)) - min_w = int(max(img.shape[1] / self.min_width_dec, self.min_width_thresh)) - gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - faces = self.face_cascade.detectMultiScale(gray, 1.3, minNeighbors=5, minSize=(min_h, min_w)) - - images = [] - for i, (x, y, w, h) in enumerate(faces): - images.append(self.sub_image('%s/%s-%d.jpg' % (self.tgtdir, self.basename, i + 1), img, x, y, w, h)) - - print('%d faces detected' % len(images)) - - for (x, y, w, h) in faces: - self.draw_rect(img, x, y, w, h) - # Fix in case nothing found in the image - outfile = '%s/%s.jpg' % (self.tgtdir, self.basename) - cv2.imwrite(outfile, img) - return images, outfile - - def sub_image(self, name, img, x, y, w, h): - upper_cut = [min(img.shape[0], y + h + FACE_PAD), min(img.shape[1], x + w + FACE_PAD)] - lower_cut = [max(y - FACE_PAD, 0), max(x - FACE_PAD, 0)] - roi_color = img[lower_cut[0]:upper_cut[0], lower_cut[1]:upper_cut[1]] - cv2.imwrite(name, roi_color) - return name - - def draw_rect(self, img, x, y, w, h): - upper_cut = [min(img.shape[0], y + h + FACE_PAD), min(img.shape[1], x + w + FACE_PAD)] - lower_cut = [max(y - FACE_PAD, 0), max(x - FACE_PAD, 0)] - cv2.rectangle(img, (lower_cut[1], lower_cut[0]), (upper_cut[1], upper_cut[0]), (255, 0, 0), 2) - diff --git a/embedding-calculator/srcext/rude_carnie/dlibdetect.py b/embedding-calculator/srcext/rude_carnie/dlibdetect.py deleted file mode 100644 index 7d05a4fc21..0000000000 --- a/embedding-calculator/srcext/rude_carnie/dlibdetect.py +++ /dev/null @@ -1,48 +0,0 @@ -from detect import ObjectDetector - -import dlib -import cv2 -FACE_PAD = 50 - -class FaceDetectorDlib(ObjectDetector): - def __init__(self, model_name, basename='frontal-face', tgtdir='.'): - self.tgtdir = tgtdir - self.basename = basename - self.detector = dlib.get_frontal_face_detector() - self.predictor = dlib.shape_predictor(model_name) - - def run(self, image_file): - print(image_file) - img = cv2.imread(image_file) - gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - faces = self.detector(gray, 1) - images = [] - bb = [] - for (i, rect) in enumerate(faces): - x = rect.left() - y = rect.top() - w = rect.right() - x - h = rect.bottom() - y - bb.append((x,y,w,h)) - images.append(self.sub_image('%s/%s-%d.jpg' % (self.tgtdir, self.basename, i + 1), img, x, y, w, h)) - - print('%d faces detected' % len(images)) - - for (x, y, w, h) in bb: - self.draw_rect(img, x, y, w, h) - # Fix in case nothing found in the image - outfile = '%s/%s.jpg' % (self.tgtdir, self.basename) - cv2.imwrite(outfile, img) - return images, outfile - - def sub_image(self, name, img, x, y, w, h): - upper_cut = [min(img.shape[0], y + h + FACE_PAD), min(img.shape[1], x + w + FACE_PAD)] - lower_cut = [max(y - FACE_PAD, 0), max(x - FACE_PAD, 0)] - roi_color = img[lower_cut[0]:upper_cut[0], lower_cut[1]:upper_cut[1]] - cv2.imwrite(name, roi_color) - return name - - def draw_rect(self, img, x, y, w, h): - upper_cut = [min(img.shape[0], y + h + FACE_PAD), min(img.shape[1], x + w + FACE_PAD)] - lower_cut = [max(y - FACE_PAD, 0), max(x - FACE_PAD, 0)] - cv2.rectangle(img, (lower_cut[1], lower_cut[0]), (upper_cut[1], upper_cut[0]), (255, 0, 0), 2) diff --git a/embedding-calculator/srcext/rude_carnie/eval.py b/embedding-calculator/srcext/rude_carnie/eval.py deleted file mode 100644 index 88116c1279..0000000000 --- a/embedding-calculator/srcext/rude_carnie/eval.py +++ /dev/null @@ -1,194 +0,0 @@ -""" -At each tick, evaluate the latest checkpoint against some validation data. -Or, you can run once by passing --run_once. OR, you can pass a --requested_step_seq of comma separated checkpoint #s that already exist that it can run in a row. - -This program expects a training base directory with the data, and md.json file -There will be sub-directories for each run underneath with the name run- -where is the training program's process ID. To run this program, you -will need to pass --train_dir which is the base path name, --run_id -and if you are using a custom name for your checkpoint, you should -pass that as well (most times you probably wont). This will yield a model path: -/run-/checkpoint - -Note: If you are training to use the same GPU you can supposedly -suspend the process. I have not found this works reliably on my Linux machine. -Instead, I have found that, often times, the GPU will not reclaim the resources -and in that case, your eval may run out of GPU memory. - -You can alternately run trainining for a number of steps, break the program -and run this, then restarting training from the old checkpoint. I also -found this inconvenient. In order to control this better, the program -requires that you explict placement of inference. It defaults to the CPU -so that it can easily run side by side with training. This does make it -much slower than if it was on the GPU, but for evaluation this may not be -a major problem. To place on the gpu, just pass --device_id /gpu: where - is the GPU ID - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import math -import time -from .data import inputs -import numpy as np -import tensorflow as tf -from .model import select_model, get_checkpoint -import os -import json - -tf.app.flags.DEFINE_string('train_dir', '/home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/test_fold_is_0', - 'Training directory (where training data lives)') - -tf.app.flags.DEFINE_integer('run_id', 0, - 'This is the run number (pid) for training proc') - -tf.app.flags.DEFINE_string('device_id', '/cpu:0', - 'What processing unit to execute inference on') - -tf.app.flags.DEFINE_string('eval_dir', '/home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/eval_test_fold_is_0', - 'Directory to put output to') - -tf.app.flags.DEFINE_string('eval_data', 'valid', - 'Data type (valid|train)') - -tf.app.flags.DEFINE_integer('num_preprocess_threads', 4, - 'Number of preprocessing threads') - -tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5, - """How often to run the eval.""") -tf.app.flags.DEFINE_integer('num_examples', 10000, - """Number of examples to run.""") -tf.app.flags.DEFINE_boolean('run_once', False, - """Whether to run eval only once.""") - -tf.app.flags.DEFINE_integer('image_size', 227, - 'Image size') - -tf.app.flags.DEFINE_integer('batch_size', 128, - 'Batch size') - -tf.app.flags.DEFINE_string('checkpoint', 'checkpoint', - 'Checkpoint basename') - - -tf.app.flags.DEFINE_string('model_type', 'default', - 'Type of convnet') - -tf.app.flags.DEFINE_string('requested_step_seq', '', 'Requested step to restore') -FLAGS = tf.app.flags.FLAGS - - - -def eval_once(saver, summary_writer, summary_op, logits, labels, num_eval, requested_step=None): - """Run Eval once. - Args: - saver: Saver. - summary_writer: Summary writer. - top_k_op: Top K op. - summary_op: Summary op. - """ - top1 = tf.nn.in_top_k(logits, labels, 1) - top2 = tf.nn.in_top_k(logits, labels, 2) - - with tf.Session() as sess: - checkpoint_path = '%s/run-%d' % (FLAGS.train_dir, FLAGS.run_id) - - model_checkpoint_path, global_step = get_checkpoint(checkpoint_path, requested_step, FLAGS.checkpoint) - - saver.restore(sess, model_checkpoint_path) - - # Start the queue runners. - coord = tf.train.Coordinator() - try: - threads = [] - for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): - threads.extend(qr.create_threads(sess, coord=coord, daemon=True, - start=True)) - num_steps = int(math.ceil(num_eval / FLAGS.batch_size)) - true_count1 = true_count2 = 0 - total_sample_count = num_steps * FLAGS.batch_size - step = 0 - print(FLAGS.batch_size, num_steps) - - while step < num_steps and not coord.should_stop(): - start_time = time.time() - v, predictions1, predictions2 = sess.run([logits, top1, top2]) - duration = time.time() - start_time - sec_per_batch = float(duration) - examples_per_sec = FLAGS.batch_size / sec_per_batch - - true_count1 += np.sum(predictions1) - true_count2 += np.sum(predictions2) - format_str = ('%s (%.1f examples/sec; %.3f sec/batch)') - print(format_str % (datetime.now(), - examples_per_sec, sec_per_batch)) - - step += 1 - - # Compute precision @ 1. - - at1 = true_count1 / total_sample_count - at2 = true_count2 / total_sample_count - print('%s: precision @ 1 = %.3f (%d/%d)' % (datetime.now(), at1, true_count1, total_sample_count)) - print('%s: recall @ 2 = %.3f (%d/%d)' % (datetime.now(), at2, true_count2, total_sample_count)) - - summary = tf.Summary() - summary.ParseFromString(sess.run(summary_op)) - summary.value.add(tag='Precision @ 1', simple_value=at1) - summary.value.add(tag=' Recall @ 2', simple_value=at2) - summary_writer.add_summary(summary, global_step) - except Exception as e: # pylint: disable=broad-except - coord.request_stop(e) - - coord.request_stop() - coord.join(threads, stop_grace_period_secs=10) - -def evaluate(run_dir): - with tf.Graph().as_default() as g: - input_file = os.path.join(FLAGS.train_dir, 'md.json') - print(input_file) - with open(input_file, 'r') as f: - md = json.load(f) - - eval_data = FLAGS.eval_data == 'valid' - num_eval = md['%s_counts' % FLAGS.eval_data] - - model_fn = select_model(FLAGS.model_type) - - - with tf.device(FLAGS.device_id): - print('Executing on %s' % FLAGS.device_id) - images, labels, _ = inputs(FLAGS.train_dir, FLAGS.batch_size, FLAGS.image_size, train=not eval_data, num_preprocess_threads=FLAGS.num_preprocess_threads) - logits = model_fn(md['nlabels'], images, 1, False) - summary_op = tf.summary.merge_all() - - summary_writer = tf.summary.FileWriter(run_dir, g) - saver = tf.train.Saver() - - if FLAGS.requested_step_seq: - sequence = FLAGS.requested_step_seq.split(',') - for requested_step in sequence: - print('Running %s' % sequence) - eval_once(saver, summary_writer, summary_op, logits, labels, num_eval, requested_step) - else: - while True: - print('Running loop') - eval_once(saver, summary_writer, summary_op, logits, labels, num_eval) - if FLAGS.run_once: - break - time.sleep(FLAGS.eval_interval_secs) - - -def main(argv=None): # pylint: disable=unused-argument - run_dir = '%s/run-%d' % (FLAGS.eval_dir, FLAGS.run_id) - if tf.gfile.Exists(run_dir): - tf.gfile.DeleteRecursively(run_dir) - tf.gfile.MakeDirs(run_dir) - evaluate(run_dir) - - -if __name__ == '__main__': - tf.app.run() diff --git a/embedding-calculator/srcext/rude_carnie/export.py b/embedding-calculator/srcext/rude_carnie/export.py deleted file mode 100644 index 38b3b9f53a..0000000000 --- a/embedding-calculator/srcext/rude_carnie/export.py +++ /dev/null @@ -1,133 +0,0 @@ -import tensorflow as tf -from .model import select_model, get_checkpoint -from .utils import RESIZE_AOI, RESIZE_FINAL -from tensorflow.python.framework import graph_util -from tensorflow.contrib.learn.python.learn.utils import export -from tensorflow.python.saved_model import builder as saved_model_builder -from tensorflow.python.saved_model import signature_constants -from tensorflow.python.saved_model import signature_def_utils -from tensorflow.python.saved_model import tag_constants -from tensorflow.python.saved_model import utils - -import os - -GENDER_LIST =['M','F'] -AGE_LIST = ['(0, 2)','(4, 6)','(8, 12)','(15, 20)','(25, 32)','(38, 43)','(48, 53)','(60, 100)'] - -tf.app.flags.DEFINE_string('checkpoint', 'checkpoint', - 'Checkpoint basename') - -tf.app.flags.DEFINE_string('class_type', 'age', - 'Classification type (age|gender)') - -tf.app.flags.DEFINE_string('model_dir', '', - 'Model directory (where training data lives)') - -tf.app.flags.DEFINE_integer('model_version', 1, - """Version number of the model.""") - -tf.app.flags.DEFINE_string('output_dir', '/tmp/tf_exported_model/0', - 'Export directory') - -tf.app.flags.DEFINE_string('model_type', 'default', - 'Type of convnet') - -tf.app.flags.DEFINE_string('requested_step', '', 'Within the model directory, a requested step to restore e.g., 9000') - -FLAGS = tf.app.flags.FLAGS - -def preproc_jpeg(image_buffer): - image = tf.image.decode_jpeg(image_buffer, channels=3) - crop = tf.image.resize_images(image, (RESIZE_AOI, RESIZE_AOI)) - # What?? - crop = tf.image.resize_images(crop, (RESIZE_FINAL, RESIZE_FINAL)) - image_out = tf.image.per_image_standardization(crop) - return image_out - -def main(argv=None): - with tf.Graph().as_default(): - - serialized_tf_example = tf.placeholder(tf.string, name='tf_example') - feature_configs = { - 'image/encoded': tf.FixedLenFeature(shape=[], dtype=tf.string), - } - tf_example = tf.parse_example(serialized_tf_example, feature_configs) - jpegs = tf_example['image/encoded'] - - images = tf.map_fn(preproc_jpeg, jpegs, dtype=tf.float32) - label_list = AGE_LIST if FLAGS.class_type == 'age' else GENDER_LIST - nlabels = len(label_list) - - config = tf.ConfigProto(allow_soft_placement=True) - with tf.Session(config=config) as sess: - - model_fn = select_model(FLAGS.model_type) - logits = model_fn(nlabels, images, 1, False) - softmax_output = tf.nn.softmax(logits) - values, indices = tf.nn.top_k(softmax_output, 2 if FLAGS.class_type == 'age' else 1) - class_tensor = tf.constant(label_list) - table = tf.contrib.lookup.index_to_string_table_from_tensor(class_tensor) - classes = table.lookup(tf.to_int64(indices)) - requested_step = FLAGS.requested_step if FLAGS.requested_step else None - checkpoint_path = '%s' % (FLAGS.model_dir) - model_checkpoint_path, global_step = get_checkpoint(checkpoint_path, requested_step, FLAGS.checkpoint) - - saver = tf.train.Saver() - saver.restore(sess, model_checkpoint_path) - print('Restored model checkpoint %s' % model_checkpoint_path) - - output_path = os.path.join( - tf.compat.as_bytes(FLAGS.output_dir), - tf.compat.as_bytes(str(FLAGS.model_version))) - print('Exporting trained model to %s' % output_path) - builder = tf.saved_model.builder.SavedModelBuilder(output_path) - - # Build the signature_def_map. - classify_inputs_tensor_info = tf.saved_model.utils.build_tensor_info( - serialized_tf_example) - classes_output_tensor_info = tf.saved_model.utils.build_tensor_info( - classes) - scores_output_tensor_info = tf.saved_model.utils.build_tensor_info(values) - classification_signature = ( - tf.saved_model.signature_def_utils.build_signature_def( - inputs={ - tf.saved_model.signature_constants.CLASSIFY_INPUTS: - classify_inputs_tensor_info - }, - outputs={ - tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES: - classes_output_tensor_info, - tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: - scores_output_tensor_info - }, - method_name=tf.saved_model.signature_constants. - CLASSIFY_METHOD_NAME)) - - predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(jpegs) - prediction_signature = ( - tf.saved_model.signature_def_utils.build_signature_def( - inputs={'images': predict_inputs_tensor_info}, - outputs={ - 'classes': classes_output_tensor_info, - 'scores': scores_output_tensor_info - }, - method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME - )) - - legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') - builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map={ - 'predict_images': - prediction_signature, - tf.saved_model.signature_constants. - DEFAULT_SERVING_SIGNATURE_DEF_KEY: - classification_signature, - }, - legacy_init_op=legacy_init_op) - - builder.save() - print('Successfully exported model to %s' % FLAGS.output_dir) - -if __name__ == '__main__': - tf.app.run() diff --git a/embedding-calculator/srcext/rude_carnie/filter_by_face.py b/embedding-calculator/srcext/rude_carnie/filter_by_face.py deleted file mode 100644 index 8beddcef49..0000000000 --- a/embedding-calculator/srcext/rude_carnie/filter_by_face.py +++ /dev/null @@ -1,76 +0,0 @@ -import numpy as np -import tensorflow as tf -import os -import cv2 -import time -import sys -from .utils import * -import csv - -# YOLO tiny -#python fd.py --filename /media/dpressel/xdata/insights/converted/ --face_detection_model weights/YOLO_tiny.ckpt --face_detection_type yolo_tiny --target yolo.csv - -# CV2 - -#python fd.py --filename /media/dpressel/xdata/insights/converted/ --face_detection_model /usr/share/opencv/haarcascades/haarcascade_frontalface_default.xml --target cascade.csv - -tf.app.flags.DEFINE_string('filename', '', - 'File (Image) or File list (Text/No header TSV) to process') - -tf.app.flags.DEFINE_string('face_detection_model', '', 'Do frontal face detection with model specified') - -tf.app.flags.DEFINE_string('face_detection_type', 'cascade', 'Face detection model type (yolo_tiny|cascade)') - -tf.app.flags.DEFINE_string('target', None, 'Target file name (defaults to {face_detection_model}.csv') -FACE_PAD = 0 -FLAGS = tf.app.flags.FLAGS - -def list_images(srcfile): - with open(srcfile, 'r') as csvfile: - delim = ',' if srcfile.endswith('.csv') else '\t' - reader = csv.reader(csvfile, delimiter=delim) - if srcfile.endswith('.csv') or srcfile.endswith('.tsv'): - print('skipping header') - _ = next(reader) - - return [row[0] for row in reader] - -def main(argv=None): # pylint: disable=unused-argument - - fd = face_detection_model(FLAGS.face_detection_type, FLAGS.face_detection_model) - files = [] - contains_faces = [] - - target = FLAGS.target = '%s.csv' % FLAGS.face_detection_type if FLAGS.target is None else FLAGS.target - - print('Creating output file %s' % target) - output = open(target, 'w') - writer = csv.writer(output) - writer.writerow(('file_with_face',)) - - if FLAGS.filename is not None: - if os.path.isdir(FLAGS.filename): - for relpath in os.listdir(FLAGS.filename): - abspath = os.path.join(FLAGS.filename, relpath) - if os.path.isfile(abspath) and any([abspath.endswith('.' + ty) for ty in ('jpg', 'png', 'JPG', 'PNG', 'jpeg')]): - print(abspath) - files.append(abspath) - elif any([FLAGS.filename.endswith('.' + ty) for ty in ('csv', 'tsv', 'txt')]): - files = list_images(FLAGS.filename) - else: - files = [FLAGS.filename] - - for f in files: - try: - images, outfile = fd.run(f) - if len(images): - print(f, 'YES') - writer.writerow((f,)) - contains_faces.append(f) - else: - print(f, 'NO') - except Exception as e: - print(e) - -if __name__=='__main__': - tf.app.run() diff --git a/embedding-calculator/srcext/rude_carnie/guess.py b/embedding-calculator/srcext/rude_carnie/guess.py deleted file mode 100644 index 39836eed8c..0000000000 --- a/embedding-calculator/srcext/rude_carnie/guess.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import math -import time -from .data import inputs -import numpy as np -import tensorflow as tf -from .model import select_model, get_checkpoint -from .utils import * -import os -import json -import csv - -RESIZE_FINAL = 227 -GENDER_LIST =['M','F'] -AGE_LIST = ['(0, 2)','(4, 6)','(8, 12)','(15, 20)','(25, 32)','(38, 43)','(48, 53)','(60, 100)'] -MAX_BATCH_SZ = 128 - -tf.app.flags.DEFINE_string('model_dir', '', - 'Model directory (where training data lives)') - -tf.app.flags.DEFINE_string('class_type', 'age', - 'Classification type (age|gender)') - - -tf.app.flags.DEFINE_string('device_id', '/cpu:0', - 'What processing unit to execute inference on') - -tf.app.flags.DEFINE_string('filename', '', - 'File (Image) or File list (Text/No header TSV) to process') - -tf.app.flags.DEFINE_string('target', '', - 'CSV file containing the filename processed along with best guess and score') - -tf.app.flags.DEFINE_string('checkpoint', 'checkpoint', - 'Checkpoint basename') - -tf.app.flags.DEFINE_string('model_type', 'default', - 'Type of convnet') - -tf.app.flags.DEFINE_string('requested_step', '', 'Within the model directory, a requested step to restore e.g., 9000') - -tf.app.flags.DEFINE_boolean('single_look', False, 'single look at the image or multiple crops') - -tf.app.flags.DEFINE_string('face_detection_model', '', 'Do frontal face detection with model specified') - -tf.app.flags.DEFINE_string('face_detection_type', 'cascade', 'Face detection model type (yolo_tiny|cascade)') - -FLAGS = tf.app.flags.FLAGS - -def one_of(fname, types): - return any([fname.endswith('.' + ty) for ty in types]) - -def resolve_file(fname): - if os.path.exists(fname): return fname - for suffix in ('.jpg', '.png', '.JPG', '.PNG', '.jpeg'): - cand = fname + suffix - if os.path.exists(cand): - return cand - return None - - -def classify_many_single_crop(sess, label_list, softmax_output, coder, images, image_files, writer): - try: - num_batches = math.ceil(len(image_files) / MAX_BATCH_SZ) - pg = ProgressBar(num_batches) - for j in range(num_batches): - start_offset = j * MAX_BATCH_SZ - end_offset = min((j + 1) * MAX_BATCH_SZ, len(image_files)) - - batch_image_files = image_files[start_offset:end_offset] - print(start_offset, end_offset, len(batch_image_files)) - image_batch = make_multi_image_batch(batch_image_files, coder) - batch_results = sess.run(softmax_output, feed_dict={images:image_batch.eval()}) - batch_sz = batch_results.shape[0] - for i in range(batch_sz): - output_i = batch_results[i] - best_i = np.argmax(output_i) - best_choice = (label_list[best_i], output_i[best_i]) - print('Guess @ 1 %s, prob = %.2f' % best_choice) - if writer is not None: - f = batch_image_files[i] - writer.writerow((f, best_choice[0], '%.2f' % best_choice[1])) - pg.update() - pg.done() - except Exception as e: - print(e) - print('Failed to run all images') - -def classify_one_multi_crop(sess, label_list, softmax_output, coder, images, image_file, writer): - try: - - print('Running file %s' % image_file) - image_batch = make_multi_crop_batch(image_file, coder) - - batch_results = sess.run(softmax_output, feed_dict={images:image_batch.eval()}) - output = batch_results[0] - batch_sz = batch_results.shape[0] - - for i in range(1, batch_sz): - output = output + batch_results[i] - - output /= batch_sz - best = np.argmax(output) - best_choice = (label_list[best], output[best]) - print('Guess @ 1 %s, prob = %.2f' % best_choice) - - nlabels = len(label_list) - if nlabels > 2: - output[best] = 0 - second_best = np.argmax(output) - print('Guess @ 2 %s, prob = %.2f' % (label_list[second_best], output[second_best])) - - if writer is not None: - writer.writerow((image_file, best_choice[0], '%.2f' % best_choice[1])) - except Exception as e: - print(e) - print('Failed to run image %s ' % image_file) - -def list_images(srcfile): - with open(srcfile, 'r') as csvfile: - delim = ',' if srcfile.endswith('.csv') else '\t' - reader = csv.reader(csvfile, delimiter=delim) - if srcfile.endswith('.csv') or srcfile.endswith('.tsv'): - print('skipping header') - _ = next(reader) - - return [row[0] for row in reader] - -def main(argv=None): # pylint: disable=unused-argument - - files = [] - - if FLAGS.face_detection_model: - print('Using face detector (%s) %s' % (FLAGS.face_detection_type, FLAGS.face_detection_model)) - face_detect = face_detection_model(FLAGS.face_detection_type, FLAGS.face_detection_model) - face_files, rectangles = face_detect.run(FLAGS.filename) - print(face_files) - files += face_files - - config = tf.ConfigProto(allow_soft_placement=True) - with tf.Session(config=config) as sess: - - label_list = AGE_LIST if FLAGS.class_type == 'age' else GENDER_LIST - nlabels = len(label_list) - - print('Executing on %s' % FLAGS.device_id) - model_fn = select_model(FLAGS.model_type) - - with tf.device(FLAGS.device_id): - - images = tf.placeholder(tf.float32, [None, RESIZE_FINAL, RESIZE_FINAL, 3]) - logits = model_fn(nlabels, images, 1, False) - init = tf.global_variables_initializer() - - requested_step = FLAGS.requested_step if FLAGS.requested_step else None - - checkpoint_path = '%s' % (FLAGS.model_dir) - - model_checkpoint_path, global_step = get_checkpoint(checkpoint_path, requested_step, FLAGS.checkpoint) - - saver = tf.train.Saver() - saver.restore(sess, model_checkpoint_path) - - softmax_output = tf.nn.softmax(logits) - - coder = ImageCoder() - - # Support a batch mode if no face detection model - if len(files) == 0: - if (os.path.isdir(FLAGS.filename)): - for relpath in os.listdir(FLAGS.filename): - abspath = os.path.join(FLAGS.filename, relpath) - - if os.path.isfile(abspath) and any([abspath.endswith('.' + ty) for ty in ('jpg', 'png', 'JPG', 'PNG', 'jpeg')]): - print(abspath) - files.append(abspath) - else: - files.append(FLAGS.filename) - # If it happens to be a list file, read the list and clobber the files - if any([FLAGS.filename.endswith('.' + ty) for ty in ('csv', 'tsv', 'txt')]): - files = list_images(FLAGS.filename) - - writer = None - output = None - if FLAGS.target: - print('Creating output file %s' % FLAGS.target) - output = open(FLAGS.target, 'w') - writer = csv.writer(output) - writer.writerow(('file', 'label', 'score')) - image_files = list(filter(lambda x: x is not None, [resolve_file(f) for f in files])) - print(image_files) - if FLAGS.single_look: - classify_many_single_crop(sess, label_list, softmax_output, coder, images, image_files, writer) - - else: - for image_file in image_files: - classify_one_multi_crop(sess, label_list, softmax_output, coder, images, image_file, writer) - - if output is not None: - output.close() - -if __name__ == '__main__': - tf.app.run() diff --git a/embedding-calculator/srcext/rude_carnie/model.py b/embedding-calculator/srcext/rude_carnie/model.py deleted file mode 100644 index efd734f8d1..0000000000 --- a/embedding-calculator/srcext/rude_carnie/model.py +++ /dev/null @@ -1,195 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import time -import os -import numpy as np -import tensorflow as tf -from .data import distorted_inputs -import re -from tensorflow.contrib.layers import * - -from tensorflow.contrib.slim.python.slim.nets.inception_v3 import inception_v3_base - - -TOWER_NAME = 'tower' - -def select_model(name): - if name.startswith('inception'): - print('selected (fine-tuning) inception model') - return inception_v3 - elif name == 'bn': - print('selected batch norm model') - return levi_hassner_bn - print('selected default model') - return levi_hassner - - -def get_checkpoint(checkpoint_path, requested_step=None, basename='checkpoint'): - if requested_step is not None: - - model_checkpoint_path = '%s/%s-%s' % (checkpoint_path, basename, requested_step) - if os.path.exists(model_checkpoint_path) is None: - print('No checkpoint file found at [%s]' % checkpoint_path) - exit(-1) - print(model_checkpoint_path) - print(model_checkpoint_path) - return model_checkpoint_path, requested_step - - ckpt = tf.train.get_checkpoint_state(checkpoint_path) - if ckpt and ckpt.model_checkpoint_path: - # Restore checkpoint as described in top of this program - print(ckpt.model_checkpoint_path) - global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] - - return ckpt.model_checkpoint_path, global_step - else: - print('No checkpoint file found at [%s]' % checkpoint_path) - exit(-1) - -def _activation_summary(x): - tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name) - tf.summary.histogram(tensor_name + '/activations', x) - tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x)) - -def inception_v3(nlabels, images, pkeep, is_training): - - batch_norm_params = { - "is_training": is_training, - "trainable": True, - # Decay for the moving averages. - "decay": 0.9997, - # Epsilon to prevent 0s in variance. - "epsilon": 0.001, - # Collection containing the moving mean and moving variance. - "variables_collections": { - "beta": None, - "gamma": None, - "moving_mean": ["moving_vars"], - "moving_variance": ["moving_vars"], - } - } - weight_decay = 0.00004 - stddev=0.1 - weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) - with tf.variable_scope("InceptionV3", "InceptionV3", [images]) as scope: - - with tf.contrib.slim.arg_scope( - [tf.contrib.slim.conv2d, tf.contrib.slim.fully_connected], - weights_regularizer=weights_regularizer, - trainable=True): - with tf.contrib.slim.arg_scope( - [tf.contrib.slim.conv2d], - weights_initializer=tf.truncated_normal_initializer(stddev=stddev), - activation_fn=tf.nn.relu, - normalizer_fn=batch_norm, - normalizer_params=batch_norm_params): - net, end_points = inception_v3_base(images, scope=scope) - with tf.variable_scope("logits"): - shape = net.get_shape() - net = avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") - net = tf.nn.dropout(net, pkeep, name='droplast') - net = flatten(net, scope="flatten") - - with tf.variable_scope('output') as scope: - - weights = tf.Variable(tf.truncated_normal([2048, nlabels], mean=0.0, stddev=0.01), name='weights') - biases = tf.Variable(tf.constant(0.0, shape=[nlabels], dtype=tf.float32), name='biases') - output = tf.add(tf.matmul(net, weights), biases, name=scope.name) - _activation_summary(output) - return output - -def levi_hassner_bn(nlabels, images, pkeep, is_training): - - batch_norm_params = { - "is_training": is_training, - "trainable": True, - # Decay for the moving averages. - "decay": 0.9997, - # Epsilon to prevent 0s in variance. - "epsilon": 0.001, - # Collection containing the moving mean and moving variance. - "variables_collections": { - "beta": None, - "gamma": None, - "moving_mean": ["moving_vars"], - "moving_variance": ["moving_vars"], - } - } - weight_decay = 0.0005 - weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) - - with tf.variable_scope("LeviHassnerBN", "LeviHassnerBN", [images]) as scope: - - with tf.contrib.slim.arg_scope( - [convolution2d, fully_connected], - weights_regularizer=weights_regularizer, - biases_initializer=tf.constant_initializer(1.), - weights_initializer=tf.random_normal_initializer(stddev=0.005), - trainable=True): - with tf.contrib.slim.arg_scope( - [convolution2d], - weights_initializer=tf.random_normal_initializer(stddev=0.01), - normalizer_fn=batch_norm, - normalizer_params=batch_norm_params): - - conv1 = convolution2d(images, 96, [7,7], [4, 4], padding='VALID', biases_initializer=tf.constant_initializer(0.), scope='conv1') - pool1 = max_pool2d(conv1, 3, 2, padding='VALID', scope='pool1') - conv2 = convolution2d(pool1, 256, [5, 5], [1, 1], padding='SAME', scope='conv2') - pool2 = max_pool2d(conv2, 3, 2, padding='VALID', scope='pool2') - conv3 = convolution2d(pool2, 384, [3, 3], [1, 1], padding='SAME', biases_initializer=tf.constant_initializer(0.), scope='conv3') - pool3 = max_pool2d(conv3, 3, 2, padding='VALID', scope='pool3') - # can use tf.contrib.layer.flatten - flat = tf.reshape(pool3, [-1, 384*6*6], name='reshape') - full1 = fully_connected(flat, 512, scope='full1') - drop1 = tf.nn.dropout(full1, pkeep, name='drop1') - full2 = fully_connected(drop1, 512, scope='full2') - drop2 = tf.nn.dropout(full2, pkeep, name='drop2') - - with tf.variable_scope('output') as scope: - - weights = tf.Variable(tf.random_normal([512, nlabels], mean=0.0, stddev=0.01), name='weights') - biases = tf.Variable(tf.constant(0.0, shape=[nlabels], dtype=tf.float32), name='biases') - output = tf.add(tf.matmul(drop2, weights), biases, name=scope.name) - - return output - -def levi_hassner(nlabels, images, pkeep, is_training): - - weight_decay = 0.0005 - weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) - with tf.variable_scope("LeviHassner", "LeviHassner", [images]) as scope: - - with tf.contrib.slim.arg_scope( - [convolution2d, fully_connected], - weights_regularizer=weights_regularizer, - biases_initializer=tf.constant_initializer(1.), - weights_initializer=tf.random_normal_initializer(stddev=0.005), - trainable=True): - with tf.contrib.slim.arg_scope( - [convolution2d], - weights_initializer=tf.random_normal_initializer(stddev=0.01)): - - conv1 = convolution2d(images, 96, [7,7], [4, 4], padding='VALID', biases_initializer=tf.constant_initializer(0.), scope='conv1') - pool1 = max_pool2d(conv1, 3, 2, padding='VALID', scope='pool1') - norm1 = tf.nn.local_response_normalization(pool1, 5, alpha=0.0001, beta=0.75, name='norm1') - conv2 = convolution2d(norm1, 256, [5, 5], [1, 1], padding='SAME', scope='conv2') - pool2 = max_pool2d(conv2, 3, 2, padding='VALID', scope='pool2') - norm2 = tf.nn.local_response_normalization(pool2, 5, alpha=0.0001, beta=0.75, name='norm2') - conv3 = convolution2d(norm2, 384, [3, 3], [1, 1], biases_initializer=tf.constant_initializer(0.), padding='SAME', scope='conv3') - pool3 = max_pool2d(conv3, 3, 2, padding='VALID', scope='pool3') - flat = tf.reshape(pool3, [-1, 384*6*6], name='reshape') - full1 = fully_connected(flat, 512, scope='full1') - drop1 = tf.nn.dropout(full1, pkeep, name='drop1') - full2 = fully_connected(drop1, 512, scope='full2') - drop2 = tf.nn.dropout(full2, pkeep, name='drop2') - - with tf.variable_scope('output') as scope: - - weights = tf.Variable(tf.random_normal([512, nlabels], mean=0.0, stddev=0.01), name='weights') - biases = tf.Variable(tf.constant(0.0, shape=[nlabels], dtype=tf.float32), name='biases') - output = tf.add(tf.matmul(drop2, weights), biases, name=scope.name) - return output - diff --git a/embedding-calculator/srcext/rude_carnie/preproc.py b/embedding-calculator/srcext/rude_carnie/preproc.py deleted file mode 100644 index c91620c4ce..0000000000 --- a/embedding-calculator/srcext/rude_carnie/preproc.py +++ /dev/null @@ -1,321 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange -from datetime import datetime -import os -import random -import sys -import threading -import numpy as np -import tensorflow as tf -import json - -RESIZE_HEIGHT = 256 -RESIZE_WIDTH = 256 - -tf.app.flags.DEFINE_string('fold_dir', '/home/dpressel/dev/work/AgeGenderDeepLearning/Folds/train_val_txt_files_per_fold/test_fold_is_0', - 'Fold directory') - -tf.app.flags.DEFINE_string('data_dir', '/data/xdata/age-gender/aligned', - 'Data directory') - - -tf.app.flags.DEFINE_string('output_dir', '/home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/test_fold_is_0', - 'Output directory') - - -tf.app.flags.DEFINE_string('train_list', 'age_train.txt', - 'Training list') -tf.app.flags.DEFINE_string('valid_list', 'age_val.txt', - 'Test list') - -tf.app.flags.DEFINE_integer('train_shards', 10, - 'Number of shards in training TFRecord files.') -tf.app.flags.DEFINE_integer('valid_shards', 2, - 'Number of shards in validation TFRecord files.') - -tf.app.flags.DEFINE_integer('num_threads', 2, - 'Number of threads to preprocess the images.') - - -FLAGS = tf.app.flags.FLAGS - -def _int64_feature(value): - """Wrapper for inserting int64 features into Example proto.""" - if not isinstance(value, list): - value = [value] - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - -def _bytes_feature(value): - """Wrapper for inserting bytes features into Example proto.""" - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - -def _convert_to_example(filename, image_buffer, label, height, width): - """Build an Example proto for an example. - Args: - filename: string, path to an image file, e.g., '/path/to/example.JPG' - image_buffer: string, JPEG encoding of RGB image - label: integer, identifier for the ground truth for the network - height: integer, image height in pixels - width: integer, image width in pixels - Returns: - Example proto - """ - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/class/label': _int64_feature(label), - 'image/filename': _bytes_feature(str.encode(os.path.basename(filename))), - 'image/encoded': _bytes_feature(image_buffer), - 'image/height': _int64_feature(height), - 'image/width': _int64_feature(width) - })) - return example - -class ImageCoder(object): - """Helper class that provides TensorFlow image coding utilities.""" - - def __init__(self): - # Create a single Session to run all image coding calls. - self._sess = tf.Session() - - # Initializes function that converts PNG to JPEG data. - self._png_data = tf.placeholder(dtype=tf.string) - image = tf.image.decode_png(self._png_data, channels=3) - self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) - - # Initializes function that decodes RGB JPEG data. - self._decode_jpeg_data = tf.placeholder(dtype=tf.string) - self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) - cropped = tf.image.resize_images(self._decode_jpeg, [RESIZE_HEIGHT, RESIZE_WIDTH]) - cropped = tf.cast(cropped, tf.uint8) - self._recoded = tf.image.encode_jpeg(cropped, format='rgb', quality=100) - - def png_to_jpeg(self, image_data): - return self._sess.run(self._png_to_jpeg, - feed_dict={self._png_data: image_data}) - - def resample_jpeg(self, image_data): - image = self._sess.run(self._recoded, #self._decode_jpeg, - feed_dict={self._decode_jpeg_data: image_data}) - - return image - - -def _is_png(filename): - """Determine if a file contains a PNG format image. - Args: - filename: string, path of the image file. - Returns: - boolean indicating if the image is a PNG. - """ - return '.png' in filename - -def _process_image(filename, coder): - """Process a single image file. - Args: - filename: string, path to an image file e.g., '/path/to/example.JPG'. - coder: instance of ImageCoder to provide TensorFlow image coding utils. - Returns: - image_buffer: string, JPEG encoding of RGB image. - height: integer, image height in pixels. - width: integer, image width in pixels. - """ - # Read the image file. - with tf.gfile.FastGFile(filename, 'rb') as f: - image_data = f.read() - - # Convert any PNG to JPEG's for consistency. - if _is_png(filename): - print('Converting PNG to JPEG for %s' % filename) - image_data = coder.png_to_jpeg(image_data) - - # Decode the RGB JPEG. - image = coder.resample_jpeg(image_data) - return image, RESIZE_HEIGHT, RESIZE_WIDTH - -def _process_image_files_batch(coder, thread_index, ranges, name, filenames, - labels, num_shards): - """Processes and saves list of images as TFRecord in 1 thread. - Args: - coder: instance of ImageCoder to provide TensorFlow image coding utils. - thread_index: integer, unique batch to run index is within [0, len(ranges)). - ranges: list of pairs of integers specifying ranges of each batches to - analyze in parallel. - name: string, unique identifier specifying the data set - filenames: list of strings; each string is a path to an image file - labels: list of integer; each integer identifies the ground truth - num_shards: integer number of shards for this data set. - """ - # Each thread produces N shards where N = int(num_shards / num_threads). - # For instance, if num_shards = 128, and the num_threads = 2, then the first - # thread would produce shards [0, 64). - num_threads = len(ranges) - assert not num_shards % num_threads - num_shards_per_batch = int(num_shards / num_threads) - - shard_ranges = np.linspace(ranges[thread_index][0], - ranges[thread_index][1], - num_shards_per_batch + 1).astype(int) - num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] - - counter = 0 - for s in xrange(num_shards_per_batch): - # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' - shard = thread_index * num_shards_per_batch + s - output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) - output_file = os.path.join(FLAGS.output_dir, output_filename) - writer = tf.python_io.TFRecordWriter(output_file) - - shard_counter = 0 - files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) - for i in files_in_shard: - filename = filenames[i] - label = int(labels[i]) - - image_buffer, height, width = _process_image(filename, coder) - - example = _convert_to_example(filename, image_buffer, label, - height, width) - writer.write(example.SerializeToString()) - shard_counter += 1 - counter += 1 - - if not counter % 1000: - print('%s [thread %d]: Processed %d of %d images in thread batch.' % - (datetime.now(), thread_index, counter, num_files_in_thread)) - sys.stdout.flush() - - writer.close() - print('%s [thread %d]: Wrote %d images to %s' % - (datetime.now(), thread_index, shard_counter, output_file)) - sys.stdout.flush() - shard_counter = 0 - print('%s [thread %d]: Wrote %d images to %d shards.' % - (datetime.now(), thread_index, counter, num_files_in_thread)) - sys.stdout.flush() - -def _process_image_files(name, filenames, labels, num_shards): - """Process and save list of images as TFRecord of Example protos. - Args: - name: string, unique identifier specifying the data set - filenames: list of strings; each string is a path to an image file - labels: list of integer; each integer identifies the ground truth - num_shards: integer number of shards for this data set. - """ - assert len(filenames) == len(labels) - - # Break all images into batches with a [ranges[i][0], ranges[i][1]]. - spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int) - ranges = [] - threads = [] - for i in xrange(len(spacing) - 1): - ranges.append([spacing[i], spacing[i+1]]) - - # Launch a thread for each batch. - print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges)) - sys.stdout.flush() - - # Create a mechanism for monitoring when all threads are finished. - coord = tf.train.Coordinator() - - coder = ImageCoder() - - threads = [] - for thread_index in xrange(len(ranges)): - args = (coder, thread_index, ranges, name, filenames, labels, num_shards) - t = threading.Thread(target=_process_image_files_batch, args=args) - t.start() - threads.append(t) - - # Wait for all the threads to terminate. - coord.join(threads) - print('%s: Finished writing all %d images in data set.' % - (datetime.now(), len(filenames))) - sys.stdout.flush() - -def _find_image_files(list_file, data_dir): - print('Determining list of input files and labels from %s.' % list_file) - files_labels = [l.strip().split(' ') for l in tf.gfile.FastGFile( - list_file, 'r').readlines()] - - labels = [] - filenames = [] - - # Leave label index 0 empty as a background class. - label_index = 1 - - # Construct the list of JPEG files and labels. - for path, label in files_labels: - jpeg_file_path = '%s/%s' % (data_dir, path) - if os.path.exists(jpeg_file_path): - filenames.append(jpeg_file_path) - labels.append(label) - - unique_labels = set(labels) - # Shuffle the ordering of all image files in order to guarantee - # random ordering of the images with respect to label in the - # saved TFRecord files. Make the randomization repeatable. - shuffled_index = list(range(len(filenames))) - random.seed(12345) - random.shuffle(shuffled_index) - - filenames = [filenames[i] for i in shuffled_index] - labels = [labels[i] for i in shuffled_index] - - print('Found %d JPEG files across %d labels inside %s.' % - (len(filenames), len(unique_labels), data_dir)) - return filenames, labels - - -def _process_dataset(name, filename, directory, num_shards): - """Process a complete data set and save it as a TFRecord. - Args: - name: string, unique identifier specifying the data set. - directory: string, root path to the data set. - num_shards: integer number of shards for this data set. - labels_file: string, path to the labels file. - """ - filenames, labels = _find_image_files(filename, directory) - _process_image_files(name, filenames, labels, num_shards) - unique_labels = set(labels) - return len(labels), unique_labels - -def main(unused_argv): - assert not FLAGS.train_shards % FLAGS.num_threads, ( - 'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards') - assert not FLAGS.valid_shards % FLAGS.num_threads, ( - 'Please make the FLAGS.num_threads commensurate with ' - 'FLAGS.valid_shards') - print('Saving results to %s' % FLAGS.output_dir) - - if os.path.exists(FLAGS.output_dir) is False: - print('creating %s' % FLAGS.output_dir) - os.makedirs(FLAGS.output_dir) - - # Run it! - valid, valid_outcomes = _process_dataset('validation', '%s/%s' % (FLAGS.fold_dir, FLAGS.valid_list), FLAGS.data_dir, - FLAGS.valid_shards) - train, train_outcomes = _process_dataset('train', '%s/%s' % (FLAGS.fold_dir, FLAGS.train_list), FLAGS.data_dir, - FLAGS.train_shards) - - if len(valid_outcomes) != len(valid_outcomes | train_outcomes): - print('Warning: unattested labels in training data [%s]' % (', '.join((valid_outcomes | train_outcomes) - valid_outcomes))) - - output_file = os.path.join(FLAGS.output_dir, 'md.json') - - - md = { 'num_valid_shards': FLAGS.valid_shards, - 'num_train_shards': FLAGS.train_shards, - 'valid_counts': valid, - 'train_counts': train, - 'timestamp': str(datetime.now()), - 'nlabels': len(train_outcomes) } - with open(output_file, 'w') as f: - json.dump(md, f) - - -if __name__ == '__main__': - tf.app.run() - diff --git a/embedding-calculator/srcext/rude_carnie/train.py b/embedding-calculator/srcext/rude_carnie/train.py deleted file mode 100644 index 3bd8213576..0000000000 --- a/embedding-calculator/srcext/rude_carnie/train.py +++ /dev/null @@ -1,192 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange -from datetime import datetime -import time -import os -import numpy as np -import tensorflow as tf -from .data import distorted_inputs -from .model import select_model -import json -import re - - -LAMBDA = 0.01 -MOM = 0.9 -tf.app.flags.DEFINE_string('pre_checkpoint_path', '', - """If specified, restore this pretrained model """ - """before beginning any training.""") - -tf.app.flags.DEFINE_string('train_dir', '/home/dpressel/dev/work/AgeGenderDeepLearning/Folds/tf/test_fold_is_0', - 'Training directory') - -tf.app.flags.DEFINE_boolean('log_device_placement', False, - """Whether to log device placement.""") - -tf.app.flags.DEFINE_integer('num_preprocess_threads', 4, - 'Number of preprocessing threads') - -tf.app.flags.DEFINE_string('optim', 'Momentum', - 'Optimizer') - -tf.app.flags.DEFINE_integer('image_size', 227, - 'Image size') - -tf.app.flags.DEFINE_float('eta', 0.01, - 'Learning rate') - -tf.app.flags.DEFINE_float('pdrop', 0., - 'Dropout probability') - -tf.app.flags.DEFINE_integer('max_steps', 40000, - 'Number of iterations') - -tf.app.flags.DEFINE_integer('steps_per_decay', 10000, - 'Number of steps before learning rate decay') -tf.app.flags.DEFINE_float('eta_decay_rate', 0.1, - 'Learning rate decay') - -tf.app.flags.DEFINE_integer('epochs', -1, - 'Number of epochs') - -tf.app.flags.DEFINE_integer('batch_size', 128, - 'Batch size') - -tf.app.flags.DEFINE_string('checkpoint', 'checkpoint', - 'Checkpoint name') - -tf.app.flags.DEFINE_string('model_type', 'default', - 'Type of convnet') - -tf.app.flags.DEFINE_string('pre_model', - '',#'./inception_v3.ckpt', - 'checkpoint file') -FLAGS = tf.app.flags.FLAGS - -# Every 5k steps cut learning rate in half -def exponential_staircase_decay(at_step=10000, decay_rate=0.1): - - print('decay [%f] every [%d] steps' % (decay_rate, at_step)) - def _decay(lr, global_step): - return tf.train.exponential_decay(lr, global_step, - at_step, decay_rate, staircase=True) - return _decay - -def optimizer(optim, eta, loss_fn, at_step, decay_rate): - global_step = tf.Variable(0, trainable=False) - optz = optim - if optim == 'Adadelta': - optz = lambda lr: tf.train.AdadeltaOptimizer(lr, 0.95, 1e-6) - lr_decay_fn = None - elif optim == 'Momentum': - optz = lambda lr: tf.train.MomentumOptimizer(lr, MOM) - lr_decay_fn = exponential_staircase_decay(at_step, decay_rate) - - return tf.contrib.layers.optimize_loss(loss_fn, global_step, eta, optz, clip_gradients=4., learning_rate_decay_fn=lr_decay_fn) - -def loss(logits, labels): - labels = tf.cast(labels, tf.int32) - cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=logits, labels=labels, name='cross_entropy_per_example') - cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') - tf.add_to_collection('losses', cross_entropy_mean) - losses = tf.get_collection('losses') - regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) - total_loss = cross_entropy_mean + LAMBDA * sum(regularization_losses) - tf.summary.scalar('tl (raw)', total_loss) - #total_loss = tf.add_n(losses + regularization_losses, name='total_loss') - loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') - loss_averages_op = loss_averages.apply(losses + [total_loss]) - for l in losses + [total_loss]: - tf.summary.scalar(l.op.name + ' (raw)', l) - tf.summary.scalar(l.op.name, loss_averages.average(l)) - with tf.control_dependencies([loss_averages_op]): - total_loss = tf.identity(total_loss) - return total_loss - -def main(argv=None): - with tf.Graph().as_default(): - - model_fn = select_model(FLAGS.model_type) - # Open the metadata file and figure out nlabels, and size of epoch - input_file = os.path.join(FLAGS.train_dir, 'md.json') - print(input_file) - with open(input_file, 'r') as f: - md = json.load(f) - - images, labels, _ = distorted_inputs(FLAGS.train_dir, FLAGS.batch_size, FLAGS.image_size, FLAGS.num_preprocess_threads) - logits = model_fn(md['nlabels'], images, 1-FLAGS.pdrop, True) - total_loss = loss(logits, labels) - - train_op = optimizer(FLAGS.optim, FLAGS.eta, total_loss, FLAGS.steps_per_decay, FLAGS.eta_decay_rate) - saver = tf.train.Saver(tf.global_variables()) - summary_op = tf.summary.merge_all() - - sess = tf.Session(config=tf.ConfigProto( - log_device_placement=FLAGS.log_device_placement)) - - tf.global_variables_initializer().run(session=sess) - - # This is total hackland, it only works to fine-tune iv3 - if FLAGS.pre_model: - inception_variables = tf.get_collection( - tf.GraphKeys.VARIABLES, scope="InceptionV3") - restorer = tf.train.Saver(inception_variables) - restorer.restore(sess, FLAGS.pre_model) - - if FLAGS.pre_checkpoint_path: - if tf.gfile.Exists(FLAGS.pre_checkpoint_path) is True: - print('Trying to restore checkpoint from %s' % FLAGS.pre_checkpoint_path) - restorer = tf.train.Saver() - tf.train.latest_checkpoint(FLAGS.pre_checkpoint_path) - print('%s: Pre-trained model restored from %s' % - (datetime.now(), FLAGS.pre_checkpoint_path)) - - - run_dir = '%s/run-%d' % (FLAGS.train_dir, os.getpid()) - - checkpoint_path = '%s/%s' % (run_dir, FLAGS.checkpoint) - if tf.gfile.Exists(run_dir) is False: - print('Creating %s' % run_dir) - tf.gfile.MakeDirs(run_dir) - - tf.train.write_graph(sess.graph_def, run_dir, 'model.pb', as_text=True) - - tf.train.start_queue_runners(sess=sess) - - - summary_writer = tf.summary.FileWriter(run_dir, sess.graph) - steps_per_train_epoch = int(md['train_counts'] / FLAGS.batch_size) - num_steps = FLAGS.max_steps if FLAGS.epochs < 1 else FLAGS.epochs * steps_per_train_epoch - print('Requested number of steps [%d]' % num_steps) - - - for step in xrange(num_steps): - start_time = time.time() - _, loss_value = sess.run([train_op, total_loss]) - duration = time.time() - start_time - - assert not np.isnan(loss_value), 'Model diverged with loss = NaN' - - if step % 10 == 0: - num_examples_per_step = FLAGS.batch_size - examples_per_sec = num_examples_per_step / duration - sec_per_batch = float(duration) - - format_str = ('%s: step %d, loss = %.3f (%.1f examples/sec; %.3f ' 'sec/batch)') - print(format_str % (datetime.now(), step, loss_value, - examples_per_sec, sec_per_batch)) - - # Loss only actually evaluated every 100 steps? - if step % 100 == 0: - summary_str = sess.run(summary_op) - summary_writer.add_summary(summary_str, step) - - if step % 1000 == 0 or (step + 1) == num_steps: - saver.save(sess, checkpoint_path, global_step=step) - -if __name__ == '__main__': - tf.app.run() diff --git a/embedding-calculator/srcext/rude_carnie/utils.py b/embedding-calculator/srcext/rude_carnie/utils.py deleted file mode 100644 index c55c3684be..0000000000 --- a/embedding-calculator/srcext/rude_carnie/utils.py +++ /dev/null @@ -1,178 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six.moves -from datetime import datetime -import sys -import math -import time -from .data import inputs, standardize_image -import numpy as np -import tensorflow as tf -from detect import * -import re - -RESIZE_AOI = 256 -RESIZE_FINAL = 227 - -# Modifed from here -# http://stackoverflow.com/questions/3160699/python-progress-bar#3160819 -class ProgressBar(object): - DEFAULT = 'Progress: %(bar)s %(percent)3d%%' - FULL = '%(bar)s %(current)d/%(total)d (%(percent)3d%%) %(remaining)d to go' - - def __init__(self, total, width=40, fmt=DEFAULT, symbol='='): - assert len(symbol) == 1 - - self.total = total - self.width = width - self.symbol = symbol - self.fmt = re.sub(r'(?P%\(.+?\))d', - r'\g%dd' % len(str(total)), fmt) - - self.current = 0 - - def update(self, step=1): - self.current += step - percent = self.current / float(self.total) - size = int(self.width * percent) - remaining = self.total - self.current - bar = '[' + self.symbol * size + ' ' * (self.width - size) + ']' - - args = { - 'total': self.total, - 'bar': bar, - 'current': self.current, - 'percent': percent * 100, - 'remaining': remaining - } - six.print_('\r' + self.fmt % args, end='') - - def done(self): - self.current = self.total - self.update(step=0) - print('') - -# Read image files -class ImageCoder(object): - - def __init__(self): - # Create a single Session to run all image coding calls. - config = tf.ConfigProto(allow_soft_placement=True) - self._sess = tf.Session(config=config) - - # Initializes function that converts PNG to JPEG data. - self._png_data = tf.placeholder(dtype=tf.string) - image = tf.image.decode_png(self._png_data, channels=3) - self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) - - # Initializes function that decodes RGB JPEG data. - self._decode_jpeg_data = tf.placeholder(dtype=tf.string) - self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) - self.crop = tf.image.resize_images(self._decode_jpeg, (RESIZE_AOI, RESIZE_AOI)) - - def png_to_jpeg(self, image_data): - return self._sess.run(self._png_to_jpeg, - feed_dict={self._png_data: image_data}) - - def decode_jpeg(self, image_data): - image = self._sess.run(self.crop, #self._decode_jpeg, - feed_dict={self._decode_jpeg_data: image_data}) - - assert len(image.shape) == 3 - assert image.shape[2] == 3 - return image - - -def _is_png(filename): - """Determine if a file contains a PNG format image. - Args: - filename: string, path of the image file. - Returns: - boolean indicating if the image is a PNG. - """ - return '.png' in filename - -def make_multi_image_batch(filenames, coder): - """Process a multi-image batch, each with a single-look - Args: - filenames: list of paths - coder: instance of ImageCoder to provide TensorFlow image coding utils. - Returns: - image_buffer: string, JPEG encoding of RGB image. - """ - - images = [] - - for filename in filenames: - with tf.gfile.FastGFile(filename, 'rb') as f: - image_data = f.read() - # Convert any PNG to JPEG's for consistency. - if _is_png(filename): - print('Converting PNG to JPEG for %s' % filename) - image_data = coder.png_to_jpeg(image_data) - - image = coder.decode_jpeg(image_data) - - crop = tf.image.resize_images(image, (RESIZE_FINAL, RESIZE_FINAL)) - image = standardize_image(crop) - images.append(image) - image_batch = tf.stack(images) - return image_batch - -def make_multi_crop_batch(filename, coder): - """Process a single image file. - Args: - filename: string, path to an image file e.g., '/path/to/example.JPG'. - coder: instance of ImageCoder to provide TensorFlow image coding utils. - Returns: - image_buffer: string, JPEG encoding of RGB image. - """ - # Read the image file. - with tf.gfile.FastGFile(filename, 'rb') as f: - image_data = f.read() - - # Convert any PNG to JPEG's for consistency. - if _is_png(filename): - print('Converting PNG to JPEG for %s' % filename) - image_data = coder.png_to_jpeg(image_data) - - image = coder.decode_jpeg(image_data) - - crops = [] - print('Running multi-cropped image') - h = image.shape[0] - w = image.shape[1] - hl = h - RESIZE_FINAL - wl = w - RESIZE_FINAL - - crop = tf.image.resize_images(image, (RESIZE_FINAL, RESIZE_FINAL)) - crops.append(standardize_image(crop)) - crops.append(standardize_image(tf.image.flip_left_right(crop))) - - corners = [ (0, 0), (0, wl), (hl, 0), (hl, wl), (int(hl/2), int(wl/2))] - for corner in corners: - ch, cw = corner - cropped = tf.image.crop_to_bounding_box(image, ch, cw, RESIZE_FINAL, RESIZE_FINAL) - crops.append(standardize_image(cropped)) - flipped = standardize_image(tf.image.flip_left_right(cropped)) - crops.append(standardize_image(flipped)) - - image_batch = tf.stack(crops) - return image_batch - - - -def face_detection_model(model_type, model_path): - model_type_lc = model_type.lower() - if model_type_lc == 'yolo_tiny': - from yolodetect import PersonDetectorYOLOTiny - return PersonDetectorYOLOTiny(model_path) - elif model_type_lc == 'yolo_face': - from yolodetect import FaceDetectorYOLO - return FaceDetectorYOLO(model_path) - elif model_type == 'dlib': - from dlibdetect import FaceDetectorDlib - return FaceDetectorDlib(model_path) - return ObjectDetectorCascadeOpenCV(model_path) diff --git a/embedding-calculator/srcext/rude_carnie/yolodetect.py b/embedding-calculator/srcext/rude_carnie/yolodetect.py deleted file mode 100644 index 81d16fd633..0000000000 --- a/embedding-calculator/srcext/rude_carnie/yolodetect.py +++ /dev/null @@ -1,283 +0,0 @@ -from detect import ObjectDetector - -import numpy as np -import tensorflow as tf -import cv2 - -class YOLOBase(ObjectDetector): - def __init__(self): - pass - - def _conv_layer(self, idx, inputs, filters, size, stride): - channels = inputs.get_shape()[3] - weight = tf.Variable(tf.truncated_normal([size, size, int(channels), filters], stddev=0.1)) - biases = tf.Variable(tf.constant(0.1, shape=[filters])) - - pad_size = size // 2 - pad_mat = np.array([[0, 0], [pad_size, pad_size], [pad_size, pad_size], [0, 0]]) - inputs_pad = tf.pad(inputs, pad_mat) - - conv = tf.nn.conv2d(inputs_pad, weight, strides=[1, stride, stride, 1], padding='VALID', - name=str(idx) + '_conv') - conv_biased = tf.add(conv, biases, name=str(idx) + '_conv_biased') - return tf.maximum(self.alpha * conv_biased, conv_biased, name=str(idx) + '_leaky_relu') - - def _pooling_layer(self, idx, inputs, size, stride): - return tf.nn.max_pool(inputs, ksize=[1, size, size, 1], strides=[1, stride, stride, 1], padding='SAME', - name=str(idx) + '_pool') - - def _fc_layer(self, idx, inputs, hiddens, flat=False, linear=False): - input_shape = inputs.get_shape().as_list() - if flat: - dim = input_shape[1] * input_shape[2] * input_shape[3] - inputs_transposed = tf.transpose(inputs, (0, 3, 1, 2)) - inputs_processed = tf.reshape(inputs_transposed, [-1, dim]) - else: - dim = input_shape[1] - inputs_processed = inputs - weight = tf.Variable(tf.truncated_normal([dim, hiddens], stddev=0.1)) - biases = tf.Variable(tf.constant(0.1, shape=[hiddens])) - if linear: return tf.add(tf.matmul(inputs_processed, weight), biases, name=str(idx) + '_fc') - ip = tf.add(tf.matmul(inputs_processed, weight), biases) - return tf.maximum(self.alpha * ip, ip, name=str(idx) + '_fc') - - def _init_base_model(self): - self.x = tf.placeholder('float32', [None, 448, 448, 3]) - conv_1 = self._conv_layer(1, self.x, 16, 3, 1) - pool_2 = self._pooling_layer(2, conv_1, 2, 2) - conv_3 = self._conv_layer(3, pool_2, 32, 3, 1) - pool_4 = self._pooling_layer(4, conv_3, 2, 2) - conv_5 = self._conv_layer(5, pool_4, 64, 3, 1) - pool_6 = self._pooling_layer(6, conv_5, 2, 2) - conv_7 = self._conv_layer(7, pool_6, 128, 3, 1) - pool_8 = self._pooling_layer(8, conv_7, 2, 2) - conv_9 = self._conv_layer(9, pool_8, 256, 3, 1) - pool_10 = self._pooling_layer(10, conv_9, 2, 2) - conv_11 = self._conv_layer(11, pool_10, 512, 3, 1) - pool_12 = self._pooling_layer(12, conv_11, 2, 2) - conv_13 = self._conv_layer(13, pool_12, 1024, 3, 1) - conv_14 = self._conv_layer(14, conv_13, 1024, 3, 1) - conv_15 = self._conv_layer(15, conv_14, 1024, 3, 1) - fc_16 = self._fc_layer(16, conv_15, 256, flat=True, linear=False) - return self._fc_layer(17, fc_16, 4096, flat=False, linear=False) - - def _iou(self, box1, box2): - tb = min(box1[0] + 0.5 * box1[2], box2[0] + 0.5 * box2[2]) - max(box1[0] - 0.5 * box1[2], - box2[0] - 0.5 * box2[2]) - lr = min(box1[1] + 0.5 * box1[3], box2[1] + 0.5 * box2[3]) - max(box1[1] - 0.5 * box1[3], - box2[1] - 0.5 * box2[3]) - if tb < 0 or lr < 0: - intersection = 0 - else: - intersection = tb * lr - return intersection / (box1[2] * box1[3] + box2[2] * box2[3] - intersection) - - def sub_image(self, name, img, x, y, w, h): - half_w = w // 2 - half_h = h // 2 - upper_cut = [y + half_h, x + half_w] - lower_cut = [y - half_h, x - half_w]; - roi_color = img[lower_cut[0]:upper_cut[0], lower_cut[1]:upper_cut[1]] - cv2.imwrite(name, roi_color) - return name - - def draw_rect(self, img, x, y, w, h): - half_w = w // 2 - half_h = h // 2 - upper_cut = [y + half_h, x + half_w] - lower_cut = [y - half_h, x - half_w]; - cv2.rectangle(img, (lower_cut[1], lower_cut[0]), (upper_cut[1], upper_cut[0]), (0, 255, 0), 2) - - def run(self, filename): - img = cv2.imread(filename) - self.h_img, self.w_img, _ = img.shape - img_resized = cv2.resize(img, (448, 448)) - img_RGB = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB) - img_resized_np = np.asarray(img_RGB) - inputs = np.zeros((1, 448, 448, 3), dtype='float32') - inputs[0] = (img_resized_np / 255.0) * 2.0 - 1.0 - in_dict = {self.x: inputs} - net_output = self.sess.run(self.fc_19, feed_dict=in_dict) - faces = self.interpret_output(net_output[0]) - images = [] - for i, (x, y, w, h, p) in enumerate(faces): - images.append(self.sub_image('%s/%s-%d.jpg' % (self.tgtdir, self.basename, i + 1), img, x, y, w, h)) - - print('%d faces detected' % len(images)) - - for (x, y, w, h, p) in faces: - print('Face found [%d, %d, %d, %d] (%.2f)' % (x, y, w, h, p)); - self.draw_rect(img, x, y, w, h) - # Fix in case nothing found in the image - outfile = '%s/%s.jpg' % (self.tgtdir, self.basename) - cv2.imwrite(outfile, img) - return images, outfile - - def __init__(self, model_name, basename, tgtdir, alpha, threshold, iou_threshold): - self.alpha = alpha - self.threshold = threshold - self.iou_threshold = iou_threshold - self.basename = basename - self.tgtdir = tgtdir - self.load_model(model_name) - -class PersonDetectorYOLOTiny(YOLOBase): - def __init__(self, model_name, basename='frontal-face', tgtdir='.', alpha=0.1, threshold=0.2, iou_threshold=0.5): - self.alpha = alpha - self.threshold = threshold - self.iou_threshold = iou_threshold - self.basename = basename - self.tgtdir = tgtdir - self.load_model(model_name) - - def load_model(self, model_name): - g = tf.Graph() - - with g.as_default(): - fc_17 = self._init_base_model() - # skip dropout_18 - self.fc_19 = self._fc_layer(19, fc_17, 1470, flat=False, linear=True) - self.sess = tf.Session(graph=g) - self.sess.run(tf.global_variables_initializer()) - self.saver = tf.train.Saver() - self.saver.restore(self.sess, model_name) - - def interpret_output(self, output): - probs = np.zeros((7, 7, 2, 20)) - class_probs = np.reshape(output[0:980], (7, 7, 20)) - scales = np.reshape(output[980:1078], (7, 7, 2)) - boxes = np.reshape(output[1078:], (7, 7, 2, 4)) - offset = np.transpose(np.reshape(np.array([np.arange(7)] * 14), (2, 7, 7)), (1, 2, 0)) - - boxes[:, :, :, 0] += offset - boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2)) - boxes[:, :, :, 0:2] = boxes[:, :, :, 0:2] / 7.0 - boxes[:, :, :, 2] = np.multiply(boxes[:, :, :, 2], boxes[:, :, :, 2]) - boxes[:, :, :, 3] = np.multiply(boxes[:, :, :, 3], boxes[:, :, :, 3]) - - boxes[:, :, :, 0] *= self.w_img - boxes[:, :, :, 1] *= self.h_img - boxes[:, :, :, 2] *= self.w_img - boxes[:, :, :, 3] *= self.h_img - - for i in range(2): - for j in range(20): - probs[:, :, i, j] = np.multiply(class_probs[:, :, j], scales[:, :, i]) - - filter_mat_probs = np.array(probs >= self.threshold, dtype='bool') - filter_mat_boxes = np.nonzero(filter_mat_probs) - boxes_filtered = boxes[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]] - probs_filtered = probs[filter_mat_probs] - classes_num_filtered = np.argmax(filter_mat_probs, axis=3)[ - filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]] - - argsort = np.array(np.argsort(probs_filtered))[::-1] - boxes_filtered = boxes_filtered[argsort] - probs_filtered = probs_filtered[argsort] - classes_num_filtered = classes_num_filtered[argsort] - - for i in range(len(boxes_filtered)): - if probs_filtered[i] == 0: - continue - - for j in range(i + 1, len(boxes_filtered)): - if self._iou(boxes_filtered[i], boxes_filtered[j]) > self.iou_threshold: - probs_filtered[j] = 0.0 - - filter_iou = np.array(probs_filtered > 0.0, dtype='bool') - boxes_filtered = boxes_filtered[filter_iou] - probs_filtered = probs_filtered[filter_iou] - classes_num_filtered = classes_num_filtered[filter_iou] - - result = [] - for i in range(len(boxes_filtered)): - if classes_num_filtered[i] == 14: - result.append([int(boxes_filtered[i][0]), - int(boxes_filtered[i][1]), - int(boxes_filtered[i][2]), - int(boxes_filtered[i][3]), - probs_filtered[i]]) - - return result - -# This model doesnt seem to work particularly well on data I have tried -class FaceDetectorYOLO(YOLOBase): - def __init__(self, model_name, basename='frontal-face', tgtdir='.', alpha=0.1, threshold=0.2, iou_threshold=0.5): - self.alpha = alpha - self.threshold = threshold - self.iou_threshold = iou_threshold - self.basename = basename - self.tgtdir = tgtdir - self.load_model(model_name) - - def load_model(self, model_name): - g = tf.Graph() - - with g.as_default(): - fc_17 = self._init_base_model() - # skip dropout_18 - self.fc_19 = self._fc_layer(19, fc_17, 1331, flat=False, linear=True) - self.sess = tf.Session(graph=g) - self.sess.run(tf.global_variables_initializer()) - self.saver = tf.train.Saver() - self.saver.restore(self.sess, model_name) - - def interpret_output(self, output): - prob_range = [0, 11 * 11 * 1] - scales_range = [prob_range[1], prob_range[1] + 11 * 11 * 2] - boxes_range = [scales_range[1], scales_range[1] + 11 * 11 * 2 * 4] - - probs = np.zeros((11, 11, 2, 1)) - class_probs = np.reshape(output[0:prob_range[1]], (11, 11, 1)) - scales = np.reshape(output[scales_range[0]:scales_range[1]], (11, 11, 2)) - boxes = np.reshape(output[boxes_range[0]:], (11, 11, 2, 4)) - offset = np.transpose(np.reshape(np.array([np.arange(11)] * (2 * 11)), (2, 11, 11)), (1, 2, 0)) - - boxes[:, :, :, 0] += offset - boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2)) - boxes[:, :, :, 0:2] = boxes[:, :, :, 0:2] / float(11) - boxes[:, :, :, 2] = np.multiply(boxes[:, :, :, 2], boxes[:, :, :, 2]) - boxes[:, :, :, 3] = np.multiply(boxes[:, :, :, 3], boxes[:, :, :, 3]) - - boxes[:, :, :, 0] *= self.w_img - boxes[:, :, :, 1] *= self.h_img - boxes[:, :, :, 2] *= self.w_img - boxes[:, :, :, 3] *= self.h_img - - for i in range(2): - probs[:, :, i, 0] = np.multiply(class_probs[:, :, 0], scales[:, :, i]) - - filter_mat_probs = np.array(probs >= self.threshold, dtype='bool') - filter_mat_boxes = np.nonzero(filter_mat_probs) - boxes_filtered = boxes[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]] - probs_filtered = probs[filter_mat_probs] - classes_num_filtered = np.argmax(filter_mat_probs, axis=3)[ - filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]] - - argsort = np.array(np.argsort(probs_filtered))[::-1] - boxes_filtered = boxes_filtered[argsort] - probs_filtered = probs_filtered[argsort] - classes_num_filtered = classes_num_filtered[argsort] - - for i in range(len(boxes_filtered)): - if probs_filtered[i] == 0: continue - for j in range(i + 1, len(boxes_filtered)): - if self._iou(boxes_filtered[i], boxes_filtered[j]) > self.iou_threshold: - probs_filtered[j] = 0.0 - - filter_iou = np.array(probs_filtered > 0.0, dtype='bool') - boxes_filtered = boxes_filtered[filter_iou] - probs_filtered = probs_filtered[filter_iou] - classes_num_filtered = classes_num_filtered[filter_iou] - - result = [] - for i in range(len(boxes_filtered)): - result.append([int(boxes_filtered[i][0]), - int(boxes_filtered[i][1]), - int(boxes_filtered[i][2]), - int(boxes_filtered[i][3]), - probs_filtered[i]]) - - return result - -