diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..08a9202 --- /dev/null +++ b/.gitignore @@ -0,0 +1,165 @@ +# Ignore weights +weights/*.onnx + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..1a0fed5 --- /dev/null +++ b/README.md @@ -0,0 +1,86 @@ +# Face Re-Identification with SCRFD and ArcFace + +![Downloads](https://img.shields.io/github/downloads/yakhyo/face-reidentification/total) + + + +This repository implements face re-identification using SCRFD for face detection and ArcFace for face recognition. It supports inference from webcam or video sources. + +## Features + +- **Face Detection**: Utilizes [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714) (SCRFD) for efficient and accurate face detection. +- **Face Recognition**: Employs [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698) for robust face recognition. +- **Real-Time Inference**: Supports both webcam and video file input for real-time processing. + + +Project folder structure: +``` +├── assets/ +│ ├── demo.mp4 +│ └── in_video.mp4 +├── faces/ +│ ├── face1.jpg +│ ├── face2.jpg +│ └── ... +├── models/ +│ ├── __init__.py +│ ├── scrfd.py +│ └── arcface.py +├── utils/ +│ └── helpers.py +├── main.py +├── README.md +└── requirements.txt +``` + + +## Installation + +1. Clone the repository: + +```bash +git clone https://github.com/yakyo/face-reidentification.git +cd face-reidentification +``` + +2. Install the required dependencies: +```bash +pip install -r requirements.txt +``` + +## Usage + +```bash +python main.py --source assets/in_video.mp4 +``` + +`main.py` arguments: + +``` +usage: main.py [-h] [--det-weight DET_WEIGHT] [--rec-weight REC_WEIGHT] [--similarity-thresh SIMILARITY_THRESH] [--confidence-thresh CONFIDENCE_THRESH] + [--faces-dir FACES_DIR] [--source SOURCE] [--max-num MAX_NUM] [--log-level LOG_LEVEL] + +Face Detection-and-Recognition + +options: + -h, --help show this help message and exit + --det-weight DET_WEIGHT + Path to detection model + --rec-weight REC_WEIGHT + Path to recognition model + --similarity-thresh SIMILARITY_THRESH + Similarity threshold between faces + --confidence-thresh CONFIDENCE_THRESH + Confidence threshold for face detection + --faces-dir FACES_DIR + Path to faces stored dir + --source SOURCE Video file or video camera source. i.e 0 - webcam + --max-num MAX_NUM Maximum number of face detections from a frame + --log-level LOG_LEVEL + Logging level +``` + +## Reference + +1. https://github.com/deepinsight/insightface/tree/master/detection/scrfd +2. https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch diff --git a/assets/demo.mp4 b/assets/demo.mp4 new file mode 100644 index 0000000..55496e4 Binary files /dev/null and b/assets/demo.mp4 differ diff --git a/assets/in_video.mp4 b/assets/in_video.mp4 new file mode 100644 index 0000000..9cfddb4 Binary files /dev/null and b/assets/in_video.mp4 differ diff --git a/faces/Chandler.png b/faces/Chandler.png new file mode 100644 index 0000000..ac3ba38 Binary files /dev/null and b/faces/Chandler.png differ diff --git a/faces/Joey.png b/faces/Joey.png new file mode 100644 index 0000000..2443aeb Binary files /dev/null and b/faces/Joey.png differ diff --git a/faces/Monica.png b/faces/Monica.png new file mode 100644 index 0000000..eaa92b0 Binary files /dev/null and b/faces/Monica.png differ diff --git a/faces/Phoebe.png b/faces/Phoebe.png new file mode 100644 index 0000000..c7bfcd9 Binary files /dev/null and b/faces/Phoebe.png differ diff --git a/faces/Rachel.png b/faces/Rachel.png new file mode 100644 index 0000000..ed8026e Binary files /dev/null and b/faces/Rachel.png differ diff --git a/faces/Ross.png b/faces/Ross.png new file mode 100644 index 0000000..e46d8b0 Binary files /dev/null and b/faces/Ross.png differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..524d96b --- /dev/null +++ b/main.py @@ -0,0 +1,204 @@ +import os +import cv2 +import random +import warnings +import argparse +import onnxruntime +import numpy as np +import logging + + +from models import SCRFD, ArcFaceONNX +from utils.helpers import draw_fancy_bbox, compute_similarity +from typing import Union, List, Tuple + + +warnings.filterwarnings("ignore") + + +def parse_args(): + parser = argparse.ArgumentParser(description="Face Detection-and-Recognition") + parser.add_argument( + "--det-weight", + type=str, + default="./weights/det_10g.onnx", + help="Path to detection model", + ) + parser.add_argument( + "--rec-weight", + type=str, + default="./weights/w600k_r50.onnx", + help="Path to recognition model", + ) + parser.add_argument( + "--similarity-thresh", + type=float, + default=0.4, + help="Similarity threshold between faces", + ) + parser.add_argument( + "--confidence-thresh", + type=float, + default=0.5, + help="Confidence threshold for face detection", + ) + parser.add_argument( + "--faces-dir", type=str, default="./faces", help="Path to faces stored dir" + ) + parser.add_argument( + "--source", + type=str, + default="0", + help="Video file or video camera source. i.e 0 - webcam", + ) + parser.add_argument( + "--max-num", + type=int, + default=10, + help="Maximum number of face detections from a frame", + ) + parser.add_argument("--log-level", type=str, default="INFO", help="Logging level") + + return parser.parse_args() + + +def setup_logging(level: str) -> None: + logging.basicConfig( + level=getattr(logging, level.upper(), None), + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + +def build_targets( + detector, recognizer, params: argparse.Namespace +) -> Union[Tuple[np.ndarray, str]]: + """Builds targets using face detection and recognition. + + Args: + detector (SCRFD): Face detector model. + recognizer (ArcFaceONNX): Face recognizer model. + image_folder (str): Path to the folder containing images. + + Returns: + List[Tuple[np.ndarray, str]]: A list where each tuple contains a feature vector and the corresponding image name. + """ + targets = [] + for filename in os.listdir(params.faces_dir): + name = filename[:-4] + image_path = os.path.join(params.faces_dir, filename) + + image = cv2.imread(image_path) + bboxes, kpss = detector.detect( + image, input_size=(640, 640), thresh=params.confidence_thresh, max_num=1 + ) + + if len(kpss) == 0: + logging.warning(f"No face detected in {image_path}. Skipping...") + continue + + feature_vector = recognizer(image, kpss[0]) + targets.append((feature_vector, name)) + + return targets + + +def frame_processor( + frame: np.ndarray, + detector, + recognizer, + targets: List[Tuple[np.ndarray, str]], + colors: dict, + params, +) -> np.ndarray: + bboxes, kpss = detector.detect( + frame, + input_size=(640, 640), + thresh=params.confidence_thresh, + max_num=params.max_num, + ) + + for bbox, kps in zip(bboxes, kpss): + x1, y1, x2, y2, score = bbox.astype(np.int32) + embedding = recognizer(frame, kps) + + max_similarity = 0 + best_match_name = "Unknown" + for target, name in targets: + similarity = compute_similarity(target, embedding) + if similarity > max_similarity and similarity > params.similarity_thresh: + max_similarity = similarity + best_match_name = name + + if best_match_name != "Unknown": + color = colors[best_match_name] + draw_fancy_bbox( + frame, + bbox, + similarity=max_similarity, + name=best_match_name, + color=color, + ) + else: + cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), thickness=2) + + return frame + + +def main(params): + setup_logging(params.log_level) + + detector = SCRFD(params.det_weight) + recognizer = ArcFaceONNX(params.rec_weight) + + targets = build_targets(detector, recognizer, params) + colors = { + name: (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256)) + for _, name in targets + } + + cap = cv2.VideoCapture(params.source) + if not cap.isOpened(): + raise Exception("Could not open video or webcam") + + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = cap.get(cv2.CAP_PROP_FPS) + + # Define the codec and create VideoWriter object + out = cv2.VideoWriter( + "friends_out.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height) + ) + + while True: + ret, frame = cap.read() + + if not ret: + break + + frame = frame_processor( + frame=frame, + detector=detector, + recognizer=recognizer, + targets=targets, + colors=colors, + params=params, + ) + + out.write(frame) + cv2.imshow("Frame", frame) + + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + cap.release() + out.release() + cv2.destroyAllWindows() + + +if __name__ == "__main__": + args = parse_args() + + if args.source.isdigit(): + args.source = int(args.source) + + main(args) diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..635e655 --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,2 @@ +from .arcface import ArcFaceONNX +from .scrfd import SCRFD diff --git a/models/arcface.py b/models/arcface.py new file mode 100644 index 0000000..f02e544 --- /dev/null +++ b/models/arcface.py @@ -0,0 +1,55 @@ +import cv2 +import numpy as np +import onnxruntime + +from utils.helpers import norm_crop_image + + +class ArcFaceONNX: + def __init__(self, model_path: str = None, session=None) -> None: + self.session = session + self.input_mean = 127.5 + self.input_std = 127.5 + self.taskname = "recognition" + + if session is None: + self.session = onnxruntime.InferenceSession( + model_path, + providers=["CUDAExecutionProvider", "CPUExecutionProvider"], + ) + input_cfg = self.session.get_inputs()[0] + input_shape = input_cfg.shape + + input_name = input_cfg.name + self.input_size = tuple(input_shape[2:4][::-1]) + self.input_shape = input_shape + + outputs = self.session.get_outputs() + output_names = [] + for output in outputs: + output_names.append(output.name) + + self.input_name = input_name + self.output_names = output_names + assert len(self.output_names) == 1 + self.output_shape = outputs[0].shape + + def get_feat(self, images: np.ndarray) -> np.ndarray: + if not isinstance(images, list): + images = [images] + + input_size = self.input_size + blob = cv2.dnn.blobFromImages( + images, + 1.0 / self.input_std, + input_size, + (self.input_mean, self.input_mean, self.input_mean), + swapRB=True + ) + outputs = self.session.run(self.output_names, {self.input_name: blob})[0] + return outputs + + def __call__(self, image, kps): + aligned_image = norm_crop_image(image, landmark=kps) + embedding = self.get_feat(aligned_image).flatten() + return embedding diff --git a/models/scrfd.py b/models/scrfd.py new file mode 100644 index 0000000..12e7328 --- /dev/null +++ b/models/scrfd.py @@ -0,0 +1,258 @@ +import os +import cv2 +import numpy as np +import onnxruntime + +from utils.helpers import distance2bbox, distance2kps + + +class SCRFD: + """ + Title: "Sample and Computation Redistribution for Efficient Face Detection" + Paper: https://arxiv.org/abs/2105.04714 + """ + + def __init__(self, model_file=None, session=None): + self.model_file = model_file + self.session = session + self.taskname = "detection" + self.batched = False + + if self.session is None: + assert self.model_file is not None + assert os.path.exists(self.model_file) + self.session = onnxruntime.InferenceSession( + self.model_file, + providers=["CUDAExecutionProvider", "CPUExecutionProvider"], + ) + + self.center_cache = {} + self.nms_thresh = 0.4 + self.det_thresh = 0.5 + self._init_vars() + + def _init_vars(self): + self.mean = 127.5 + self.std = 128.0 + + input_cfg = self.session.get_inputs()[0] + input_shape = input_cfg.shape + # print(input_shape) + if isinstance(input_shape[2], str): + self.input_size = None + else: + self.input_size = tuple(input_shape[2:4][::-1]) + + input_name = input_cfg.name + self.input_shape = input_shape + + outputs = self.session.get_outputs() + if len(outputs[0].shape) == 3: + self.batched = True + + output_names = [] + for output in outputs: + output_names.append(output.name) + + self.input_name = input_name + self.output_names = output_names + + self.use_kps = False + self._anchor_ratio = 1.0 + self._num_anchors = 1 + if len(outputs) == 6: + self.fmc = 3 + self._feat_stride_fpn = [8, 16, 32] + self._num_anchors = 2 + elif len(outputs) == 9: + self.fmc = 3 + self._feat_stride_fpn = [8, 16, 32] + self._num_anchors = 2 + self.use_kps = True + elif len(outputs) == 10: + self.fmc = 5 + self._feat_stride_fpn = [8, 16, 32, 64, 128] + self._num_anchors = 1 + elif len(outputs) == 15: + self.fmc = 5 + self._feat_stride_fpn = [8, 16, 32, 64, 128] + self._num_anchors = 1 + self.use_kps = True + + def forward(self, image, threshold): + scores_list = [] + bboxes_list = [] + kpss_list = [] + input_size = tuple(image.shape[0:2][::-1]) + + blob = cv2.dnn.blobFromImage( + image, + 1.0 / self.std, + input_size, + (self.mean, self.mean, self.mean), + swapRB=True + ) + outputs = self.session.run(self.output_names, {self.input_name: blob}) + + input_height = blob.shape[2] + input_width = blob.shape[3] + + fmc = self.fmc + for idx, stride in enumerate(self._feat_stride_fpn): + if self.batched: # If model support batch dim, take first output + scores = outputs[idx][0] + bbox_preds = outputs[idx + fmc][0] + bbox_preds = bbox_preds * stride + if self.use_kps: + kps_preds = outputs[idx + fmc * 2][0] * stride + else: # If model doesn't support batching take output as is + scores = outputs[idx] + bbox_preds = outputs[idx + fmc] + bbox_preds = bbox_preds * stride + if self.use_kps: + kps_preds = outputs[idx + fmc * 2] * stride + + height = input_height // stride + width = input_width // stride + key = (height, width, stride) + if key in self.center_cache: + anchor_centers = self.center_cache[key] + else: + anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32) + anchor_centers = (anchor_centers * stride).reshape((-1, 2)) + if self._num_anchors > 1: + anchor_centers = np.stack([anchor_centers] * self._num_anchors, axis=1).reshape((-1, 2)) + if len(self.center_cache) < 100: + self.center_cache[key] = anchor_centers + + pos_inds = np.where(scores >= threshold)[0] + bboxes = distance2bbox(anchor_centers, bbox_preds) + pos_scores = scores[pos_inds] + pos_bboxes = bboxes[pos_inds] + scores_list.append(pos_scores) + bboxes_list.append(pos_bboxes) + if self.use_kps: + kpss = distance2kps(anchor_centers, kps_preds) + kpss = kpss.reshape((kpss.shape[0], -1, 2)) + pos_kpss = kpss[pos_inds] + kpss_list.append(pos_kpss) + return scores_list, bboxes_list, kpss_list + + def detect(self, image, input_size=None, thresh=None, max_num=0, metric="default"): + assert input_size is not None or self.input_size is not None + input_size = self.input_size if input_size is None else input_size + + im_ratio = float(image.shape[0]) / image.shape[1] + model_ratio = float(input_size[1]) / input_size[0] + if im_ratio > model_ratio: + new_height = input_size[1] + new_width = int(new_height / im_ratio) + else: + new_width = input_size[0] + new_height = int(new_width * im_ratio) + + det_scale = float(new_height) / image.shape[0] + resized_image = cv2.resize(image, (new_width, new_height)) + det_image = np.zeros((input_size[1], input_size[0], 3), dtype=np.uint8) + det_image[:new_height, :new_width, :] = resized_image + det_thresh = thresh if thresh is not None else self.det_thresh + + scores_list, bboxes_list, kpss_list = self.forward(det_image, det_thresh) + + scores = np.vstack(scores_list) + scores_ravel = scores.ravel() + order = scores_ravel.argsort()[::-1] + bboxes = np.vstack(bboxes_list) / det_scale + + if self.use_kps: + kpss = np.vstack(kpss_list) / det_scale + + pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False) + pre_det = pre_det[order, :] + keep = self.nms(pre_det, thresh=self.nms_thresh) + det = pre_det[keep, :] + if self.use_kps: + kpss = kpss[order, :, :] + kpss = kpss[keep, :, :] + else: + kpss = None + if 0 < max_num < det.shape[0]: + area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) + image_center = image.shape[0] // 2, image.shape[1] // 2 + offsets = np.vstack( + [ + (det[:, 0] + det[:, 2]) / 2 - image_center[1], + (det[:, 1] + det[:, 3]) / 2 - image_center[0], + ] + ) + offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) + if metric == "max": + values = area + else: + values = (area - offset_dist_squared * 2.0) # some extra weight on the centering + bindex = np.argsort(values)[::-1] # some extra weight on the centering + bindex = bindex[0:max_num] + det = det[bindex, :] + if kpss is not None: + kpss = kpss[bindex, :] + return det, kpss + + def nms(self, dets, thresh): + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + indices = np.where(ovr <= thresh)[0] + order = order[indices + 1] + + return keep + + +if __name__ == "__main__": + detector = SCRFD(model_file="./weights/det_10g.onnx") + cap = cv2.VideoCapture(0) + + while True: + ret, frame = cap.read() + if not cap.isOpened(): + break + + boxes_list, points_list = detector.detect(frame, input_size=(640, 640)) + + if points_list is not None: + print(points_list.shape) + + for boxes, points in zip(boxes_list, points_list): + x1, y1, x2, y2, score = boxes.astype(np.int32) + cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) + + if points_list is not None: + for point in points: + point = point.astype(np.int32) + cv2.circle(frame, tuple(point), 1, (0, 0, 255), 2) + + cv2.imshow("FaceDetection", frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + cap.release() + cv2.destroyAllWindows() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ad04978 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +onnx==1.16.1 +onnxruntime==1.18.1 +onnxruntime-gpu==1.18.0 +opencv-python==4.9.0.80 +numpy==1.24.4 +scikit-image==0.23.2 diff --git a/utils/helpers.py b/utils/helpers.py new file mode 100644 index 0000000..82937b7 --- /dev/null +++ b/utils/helpers.py @@ -0,0 +1,151 @@ +import cv2 +import numpy as np +from skimage.transform import SimilarityTransform + + +reference_alignment = np.array( + [[ + [38.2946, 51.6963], + [73.5318, 51.5014], + [56.0252, 71.7366], + [41.5493, 92.3655], + [70.7299, 92.2041] + ]], + dtype=np.float32 +) + + +def estimate_norm(landmark, image_size=112): + """Estimate normalization transformation matrix for facial landmarks. + + Args: + landmark (ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks. + image_size (int, optional): The size of the output image. Default is 112. + + Returns: + tuple: A tuple containing: + - min_matrix (ndarray): The 2x3 transformation matrix for aligning the landmarks. + - min_index (int): The index of the reference alignment that resulted in the minimum error. + """ + assert landmark.shape == (5, 2) + min_matrix = [] + min_index = [] + min_error = float('inf') + + landmark_transform = np.insert(landmark, 2, values=np.ones(5), axis=1) + transform = SimilarityTransform() + + if image_size == 112: + alignment = reference_alignment + else: + alignment = float(image_size) / 112 * reference_alignment + + for i in np.arange(alignment.shape[0]): + transform.estimate(landmark, alignment[i]) + matrix = transform.params[0:2, :] + results = np.dot(matrix, landmark_transform.T) + results = results.T + error = np.sum(np.sqrt(np.sum((results - alignment[i]) ** 2, axis=1))) + if error < min_error: + min_error = error + min_matrix = matrix + min_index = i + return min_matrix, min_index + + +def norm_crop_image(image, landmark, image_size=112, mode='arcface'): + M, pose_index = estimate_norm(landmark, image_size) + warped = cv2.warpAffine(image, M, (image_size, image_size), borderValue=0.0) + return warped + + +def distance2bbox(points, distance, max_shape=None): + """Decode distance prediction to bounding box. + + Args: + points (Tensor): Shape (n, 2), [x, y]. + distance (Tensor): Distance from the given point to 4 + boundaries (left, top, right, bottom). + max_shape (tuple): Shape of the image. + + Returns: + Tensor: Decoded bounding boxes with shape (n, 4). + """ + x1 = points[:, 0] - distance[:, 0] + y1 = points[:, 1] - distance[:, 1] + x2 = points[:, 0] + distance[:, 2] + y2 = points[:, 1] + distance[:, 3] + if max_shape is not None: + x1 = x1.clamp(min=0, max=max_shape[1]) + y1 = y1.clamp(min=0, max=max_shape[0]) + x2 = x2.clamp(min=0, max=max_shape[1]) + y2 = y2.clamp(min=0, max=max_shape[0]) + return np.stack([x1, y1, x2, y2], axis=-1) + + +def distance2kps(points, distance, max_shape=None): + """Decode distance prediction to keypoints. + + Args: + points (Tensor): Shape (n, 2), [x, y]. + distance (Tensor): Distance from the given point to 4 + boundaries (left, top, right, bottom). + max_shape (tuple): Shape of the image. + + Returns: + Tensor: Decoded keypoints with shape (n, 2k). + """ + preds = [] + for i in range(0, distance.shape[1], 2): + px = points[:, i % 2] + distance[:, i] + py = points[:, i % 2 + 1] + distance[:, i + 1] + if max_shape is not None: + px = px.clamp(min=0, max=max_shape[1]) + py = py.clamp(min=0, max=max_shape[0]) + preds.append(px) + preds.append(py) + return np.stack(preds, axis=-1) + + +def compute_similarity(feat1: np.ndarray, feat2: np.ndarray) -> np.float32: + """Computing Similarity between two faces. + + Args: + feat1 (np.ndarray): Face features. + feat2 (np.ndarray): Face features. + + Returns: + np.float32: Cosine similarity between face features. + """ + feat1 = feat1.ravel() + feat2 = feat2.ravel() + similarity = np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2)) + return similarity + + +def draw_fancy_bbox(frame, bbox, similarity, name, color): + x1, y1, x2, y2, score = bbox.astype(np.int32) + + cv2.putText( + frame, + f"{name}: {similarity:.2f}", + org=(x1, y1-10), + fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL, + fontScale=1, + color=color, + thickness=1 + ) + + # Draw bounding box + cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness=2) + + # Draw similarity bar + rect_x_start = x2 + 10 + rect_x_end = rect_x_start + 10 + rect_y_end = y2 + rect_height = int(similarity * (y2 - y1)) + rect_y_start = rect_y_end - rect_height # Rectangle starts from bottom and goes upward + + # Draw the filled rectangle + cv2.rectangle(frame, (rect_x_start, rect_y_start), (rect_x_end, rect_y_end), color, cv2.FILLED) + diff --git a/weights/.gitkeep b/weights/.gitkeep new file mode 100644 index 0000000..e69de29