resume_photo_maker.py

import onnxruntime
import cv2
import numpy as np
import argparse

# The common resume photo size is 35mmx45mm
RESUME_PHOTO_W = 350
RESUME_PHOTO_H = 450


# modified from https://github.com/opencv/opencv_zoo/blob/main/models/face_detection_yunet/yunet.py
class YuNet:
    def __init__(
        self,
        modelPath,
        inputSize=[320, 320],
        confThreshold=0.6,
        nmsThreshold=0.3,
        topK=5000,
        backendId=0,
        targetId=0,
    ):
        self._modelPath = modelPath
        self._inputSize = tuple(inputSize)  # [w, h]
        self._confThreshold = confThreshold
        self._nmsThreshold = nmsThreshold
        self._topK = topK
        self._backendId = backendId
        self._targetId = targetId

        self._model = cv2.FaceDetectorYN.create(
            model=self._modelPath,
            config="",
            input_size=self._inputSize,
            score_threshold=self._confThreshold,
            nms_threshold=self._nmsThreshold,
            top_k=self._topK,
            backend_id=self._backendId,
            target_id=self._targetId,
        )

    @property
    def name(self):
        return self.__class__.__name__

    def setBackendAndTarget(self, backendId, targetId):
        self._backendId = backendId
        self._targetId = targetId
        self._model = cv2.FaceDetectorYN.create(
            model=self._modelPath,
            config="",
            input_size=self._inputSize,
            score_threshold=self._confThreshold,
            nms_threshold=self._nmsThreshold,
            top_k=self._topK,
            backend_id=self._backendId,
            target_id=self._targetId,
        )

    def setInputSize(self, input_size):
        self._model.setInputSize(tuple(input_size))

    def infer(self, image):
        # Forward
        faces = self._model.detect(image)
        return faces[1]


class ONNXModel:
    def __init__(self, model_path, input_w, input_h):
        self.model = onnxruntime.InferenceSession(model_path)
        self.input_w = input_w
        self.input_h = input_h

    def preprocess(self, rgb, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
        # convert the input data into the float32 input
        img_data = (
            np.array(cv2.resize(rgb, (self.input_w, self.input_h)))
            .transpose(2, 0, 1)
            .astype("float32")
        )

        # normalize
        norm_img_data = np.zeros(img_data.shape).astype("float32")

        for i in range(img_data.shape[0]):
            norm_img_data[i, :, :] = img_data[i, :, :] / 255
            norm_img_data[i, :, :] = (norm_img_data[i, :, :] - mean[i]) / std[i]

        # add batch channel
        norm_img_data = norm_img_data.reshape(1, 3, self.input_h, self.input_w).astype(
            "float32"
        )
        return norm_img_data

    def forward(self, image):
        input_data = self.preprocess(image)
        output_data = self.model.run(["argmax_0.tmp_0"], {"x": input_data})

        return output_data


def parse_args():
    parser = argparse.ArgumentParser(description="Resume Photo Maker")
    parser.add_argument(
        "--background_color",
        "-bg",
        nargs="+",
        type=int,
        default=(255, 255, 255),
        help="Set the background color RGB values.",
    )
    parser.add_argument(
        "--image", "-i", type=str, default="images/elon.jpg", help="Input image path."
    )

    args = parser.parse_args()

    return args


if __name__ == "__main__":
    args = parse_args()

    bgr = cv2.imread(args.image)
    h, w, _ = bgr.shape

    # Initialize models
    face_detector = YuNet("models/face_detection_yunet_2023mar.onnx")
    face_detector.setInputSize([w, h])
    human_segmentor = ONNXModel(
        "models/human_pp_humansegv2_lite_192x192_inference_model.onnx", 192, 192
    )

    # yunet uses opencv bgr image format
    detections = face_detector.infer(bgr)

    for idx, det in enumerate(detections):
        # bounding box
        pt1 = np.array((det[0], det[1]))
        pt2 = np.array((det[0] + det[2], det[1] + det[3]))

        # face landmarks
        landmarks = det[4:14].reshape((5, 2))
        right_eye = landmarks[0]
        left_eye = landmarks[1]

        angle = np.arctan2(right_eye[1] - left_eye[1], (right_eye[0] - left_eye[0]))
        rmat = cv2.getRotationMatrix2D((0, 0), -angle, 1)

        # apply rotation
        rotated_bgr = cv2.warpAffine(bgr, rmat, (bgr.shape[1], bgr.shape[0]))
        rotated_pt1 = rmat[:, :-1] @ pt1
        rotated_pt2 = rmat[:, :-1] @ pt2

        face_w, face_h = rotated_pt2 - rotated_pt1
        up_length = int(face_h / 4)
        down_length = int(face_h / 3)
        crop_h = face_h + up_length + down_length
        crop_w = int(crop_h * (RESUME_PHOTO_W / RESUME_PHOTO_H))

        pt1 = np.array(
            (rotated_pt1[0] - (crop_w - face_w) / 2, rotated_pt1[1] - up_length)
        ).astype(np.int32)
        pt2 = np.array((pt1[0] + crop_w, pt1[1] + crop_h)).astype(np.int32)

        resume_photo = rotated_bgr[pt1[1] : pt2[1], pt1[0] : pt2[0], :]

        rgb = cv2.cvtColor(resume_photo, cv2.COLOR_BGR2RGB)
        mask = human_segmentor.forward(rgb)
        mask = mask[0].transpose(1, 2, 0)
        mask = cv2.resize(
            mask.astype(np.uint8), (resume_photo.shape[1], resume_photo.shape[0])
        )

        resume_photo[mask == 0] = args.background_color

        resume_photo = cv2.resize(resume_photo, (RESUME_PHOTO_W, RESUME_PHOTO_H))
        cv2.imwrite(f"masked_resume_photo_{idx}.jpg", resume_photo)