Skip to content

Commit

Permalink
Added subsampling
Browse files Browse the repository at this point in the history
Upgraded gradio
  • Loading branch information
C0untFloyd committed Jul 14, 2024
1 parent 4236385 commit 126fd69
Show file tree
Hide file tree
Showing 16 changed files with 109 additions and 50 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ Additional commandline arguments are currently unsupported and settings should b

### Changelog

**14.07.2024** v4.1.0

- Added subsample upscaling to increase swap resolution
- Upgraded gradio


**22.04.2024** v3.9.0

- Bugfix: Face detection bounding box corrupt values at weird angles
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
--extra-index-url https://download.pytorch.org/whl/cu118

numpy==1.26.4
gradio==4.32.1
gradio==4.38.1
opencv-python==4.9.0.80
onnx==1.16.0
insightface==0.7.3
Expand Down
75 changes: 61 additions & 14 deletions roop/ProcessMgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from enum import Enum
from roop.ProcessOptions import ProcessOptions

from roop.face_util import get_first_face, get_all_faces, rotate_image_180, rotate_anticlockwise, rotate_clockwise, clamp_cut_values
from roop.face_util import get_first_face, get_all_faces, rotate_anticlockwise, rotate_clockwise, clamp_cut_values
from roop.utilities import compute_cosine_distance, get_device, str_to_class
import roop.vr_util as vr

Expand All @@ -20,6 +20,7 @@
import roop.globals



# Poor man's enum to be able to compare to int
class eNoFaceAction():
USE_ORIGINAL_FRAME = 0
Expand All @@ -44,6 +45,7 @@ def pick_queue(queue: Queue[str], queue_per_future: int) -> List[str]:
return queues



class ProcessMgr():
input_face_datas = []
target_face_datas = []
Expand Down Expand Up @@ -317,11 +319,6 @@ def update_progress(self, progress: Any = None) -> None:
self.progress_gradio((progress.n, self.total_frames), desc='Processing', total=self.total_frames, unit='frames')


# https://github.com/deepinsight/insightface#third-party-re-implementation-of-arcface
# https://github.com/deepinsight/insightface/blob/master/alignment/coordinate_reg/image_infer.py
# https://github.com/deepinsight/insightface/issues/1350
# https://github.com/linghu8812/tensorrt_inference


def process_frame(self, frame:Frame):
if len(self.input_face_datas) < 1 and not self.options.show_face_masking:
Expand Down Expand Up @@ -541,17 +538,30 @@ def process_face(self,face_index, target_face:Face, frame:Frame):

# img = vr.GetPerspective(frame, 90, theta, phi, 1280, 1280) # Generate perspective image

fake_frame = None
aligned_img, M = align_crop(frame, target_face.kps, 128)

""" Code ported/adapted from Facefusion which borrowed the idea from Rope:
Kind of subsampling the cutout and aligned face image and faceswapping slices of it up to
the desired output resolution. This works around the current resolution limitations without using enhancers.
"""
model_output_size = 128
subsample_size = self.options.subsample_size
subsample_total = subsample_size // model_output_size
aligned_img, M = align_crop(frame, target_face.kps, subsample_size)

fake_frame = aligned_img
swap_frame = aligned_img
target_face.matrix = M

for p in self.processors:
if p.type == 'swap':
if inputface is not None:
swap_result_frames = []
subsample_frames = self.implode_pixel_boost(aligned_img, model_output_size, subsample_total)
for sliced_frame in subsample_frames:
for _ in range(0,self.options.num_swap_steps):
swap_frame = p.Run(inputface, target_face, swap_frame)
fake_frame = swap_frame
sliced_frame = self.prepare_crop_frame(sliced_frame)
sliced_frame = p.Run(inputface, target_face, sliced_frame)
sliced_frame = self.normalize_swap_frame(sliced_frame)
swap_result_frames.append(sliced_frame)
fake_frame = self.explode_pixel_boost(swap_result_frames, model_output_size, subsample_total, subsample_size)
scale_factor = 0.0
elif p.type == 'mask':
fake_frame = self.process_mask(p, aligned_img, fake_frame)
Expand All @@ -560,8 +570,8 @@ def process_face(self,face_index, target_face:Face, frame:Frame):

upscale = 512
orig_width = fake_frame.shape[1]

fake_frame = cv2.resize(fake_frame, (upscale, upscale), cv2.INTER_CUBIC)
if orig_width != upscale:
fake_frame = cv2.resize(fake_frame, (upscale, upscale), cv2.INTER_CUBIC)
mask_offsets = (0,0,0,0,1,20) if inputface is None else inputface.mask_offsets


Expand Down Expand Up @@ -673,6 +683,43 @@ def blur_area(self, img_matte, num_erosion_iterations, blur_amount):
return cv2.GaussianBlur(img_matte, blur_size, 0)


def prepare_crop_frame(self, swap_frame):
model_type = 'inswapper'
model_mean = [0.0, 0.0, 0.0]
model_standard_deviation = [1.0, 1.0, 1.0]

if model_type == 'ghost':
swap_frame = swap_frame[:, :, ::-1] / 127.5 - 1
else:
swap_frame = swap_frame[:, :, ::-1] / 255.0
swap_frame = (swap_frame - model_mean) / model_standard_deviation
swap_frame = swap_frame.transpose(2, 0, 1)
swap_frame = np.expand_dims(swap_frame, axis = 0).astype(np.float32)
return swap_frame


def normalize_swap_frame(self, swap_frame):
model_type = 'inswapper'
swap_frame = swap_frame.transpose(1, 2, 0)

if model_type == 'ghost':
swap_frame = (swap_frame * 127.5 + 127.5).round()
else:
swap_frame = (swap_frame * 255.0).round()
swap_frame = swap_frame[:, :, ::-1]
return swap_frame

def implode_pixel_boost(self, aligned_face_frame, model_size, pixel_boost_total : int):
subsample_frame = aligned_face_frame.reshape(model_size, pixel_boost_total, model_size, pixel_boost_total, 3)
subsample_frame = subsample_frame.transpose(1, 3, 0, 2, 4).reshape(pixel_boost_total ** 2, model_size, model_size, 3)
return subsample_frame


def explode_pixel_boost(self, subsample_frame, model_size, pixel_boost_total, pixel_boost_size):
final_frame = np.stack(subsample_frame, axis = 0).reshape(pixel_boost_total, pixel_boost_total, model_size, model_size, 3)
final_frame = final_frame.transpose(2, 0, 3, 1, 4).reshape(pixel_boost_size, pixel_boost_size, 3)
return final_frame

def process_mask(self, processor, frame:Frame, target:Frame):
img_mask = processor.Run(frame, self.options.masking_text)
img_mask = cv2.resize(img_mask, (target.shape[1], target.shape[0]))
Expand Down
5 changes: 3 additions & 2 deletions roop/ProcessOptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
class ProcessOptions:

def __init__(self, processordefines:dict, face_distance, blend_ratio, swap_mode, selected_index, masking_text, imagemask, num_steps, show_face_area, show_mask=False):
def __init__(self, processordefines:dict, face_distance, blend_ratio, swap_mode, selected_index, masking_text, imagemask, num_steps, subsample_size, show_face_area, show_mask=False):
self.processors = processordefines
self.face_distance_threshold = face_distance
self.blend_ratio = blend_ratio
Expand All @@ -10,4 +10,5 @@ def __init__(self, processordefines:dict, face_distance, blend_ratio, swap_mode
self.imagemask = imagemask
self.num_swap_steps = num_steps
self.show_face_area_overlay = show_face_area
self.show_face_masking = show_mask
self.show_face_masking = show_mask
self.subsample_size = subsample_size
4 changes: 3 additions & 1 deletion roop/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,9 @@ def batch_process_regular(files:list[ProcessEntry], masking_engine:str, new_clip
mask = imagemask["layers"][0] if imagemask is not None else None
if len(roop.globals.INPUT_FACESETS) <= selected_index:
selected_index = 0
options = ProcessOptions(get_processing_plugins(masking_engine), roop.globals.distance_threshold, roop.globals.blend_ratio, roop.globals.face_swap_mode, selected_index, new_clip_text, mask, num_swap_steps, False)
options = ProcessOptions(get_processing_plugins(masking_engine), roop.globals.distance_threshold, roop.globals.blend_ratio,
roop.globals.face_swap_mode, selected_index, new_clip_text, mask, num_swap_steps,
roop.globals.subsample_size, False)
process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
batch_process(files, use_new_method)
return
Expand Down
11 changes: 7 additions & 4 deletions roop/face_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,15 +210,18 @@ def rotate_image_180(image):
)


def estimate_norm(lmk, image_size=112, mode="arcface"):
def estimate_norm(lmk, image_size=112):
assert lmk.shape == (5, 2)
assert image_size % 112 == 0 or image_size % 128 == 0
if image_size % 112 == 0:
ratio = float(image_size) / 112.0
diff_x = 0
else:
elif image_size % 128 == 0:
ratio = float(image_size) / 128.0
diff_x = 8.0 * ratio
elif image_size % 512 == 0:
ratio = float(image_size) / 512.0
diff_x = 32.0 * ratio

dst = arcface_dst * ratio
dst[:, 0] += diff_x
tform = trans.SimilarityTransform()
Expand All @@ -230,7 +233,7 @@ def estimate_norm(lmk, image_size=112, mode="arcface"):

# aligned, M = norm_crop2(f[1], face.kps, 512)
def align_crop(img, landmark, image_size=112, mode="arcface"):
M = estimate_norm(landmark, image_size, mode)
M = estimate_norm(landmark, image_size)
warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
return warped, M

Expand Down
1 change: 1 addition & 0 deletions roop/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
headless = None
log_level = 'error'
selected_enhancer = None
subsample_size = 128
face_swap_mode = None
blend_ratio = 0.5
distance_threshold = 0.65
Expand Down
2 changes: 1 addition & 1 deletion roop/metadata.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
name = 'roop unleashed'
version = '4.0.0'
version = '4.1.0'
14 changes: 3 additions & 11 deletions roop/processors/FaceSwapInsightFace.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,26 +36,18 @@ def Initialize(self, plugin_options:dict):
self.model_swap_insightface = onnxruntime.InferenceSession(model_path, sess_options, providers=roop.globals.execution_providers)



def Run(self, source_face: Face, target_face: Face, temp_frame: Frame) -> Frame:
blob = cv2.dnn.blobFromImage(temp_frame, 1.0 / self.input_std, (128, 128),
(self.input_mean, self.input_mean, self.input_mean), swapRB=True)
latent = source_face.normed_embedding.reshape((1,-1))
latent = np.dot(latent, self.emap)
latent /= np.linalg.norm(latent)
io_binding = self.model_swap_insightface.io_binding()
io_binding.bind_cpu_input("target", blob)
io_binding.bind_cpu_input("target", temp_frame)
io_binding.bind_cpu_input("source", latent)
io_binding.bind_output("output", self.devicename)
self.model_swap_insightface.run_with_iobinding(io_binding)
ort_outs = io_binding.copy_outputs_to_cpu()[0]
img_fake = ort_outs.transpose((0,2,3,1))[0]
return np.clip(255 * img_fake, 0, 255).astype(np.uint8)[:,:,::-1]


img_fake, M = self.model_swap_insightface.get(temp_frame, target_face, source_face, paste_back=False)
# target_face.matrix = M
# return img_fake
return ort_outs[0]


def Release(self):
Expand Down
3 changes: 2 additions & 1 deletion roop/virtualcam.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ def virtualcamera(streamobs, cam_num,width,height):
print(f'Using {cam.native_fmt}')
else:
print(f'Not streaming to virtual camera!')
subsample_size = roop.globals.subsample_size

# always use xseg masking
options = ProcessOptions(get_processing_plugins("mask_xseg"), roop.globals.distance_threshold, roop.globals.blend_ratio,
"all", 0, None, None, 1, False)
"all", 0, None, None, 1, subsample_size, False)
while cam_active:
ret, frame = cap.read()
if not ret:
Expand Down
1 change: 1 addition & 0 deletions ui/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
ui_SELECTED_INPUT_FACE_INDEX = 0

ui_selected_enhancer = None
ui_upscale = None
ui_blend_ratio = None
ui_input_thumbs = []
ui_target_thumbs = []
Expand Down
2 changes: 1 addition & 1 deletion ui/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def run():
if server_port <= 0:
server_port = None
ssl_verify = False if server_name == '0.0.0.0' else True
with gr.Blocks(title=f'{roop.metadata.name} {roop.metadata.version}', theme=roop.globals.CFG.selected_theme, css=mycss) as ui:
with gr.Blocks(title=f'{roop.metadata.name} {roop.metadata.version}', theme=roop.globals.CFG.selected_theme, css=mycss, delete_cache=(60, 86400)) as ui:
with gr.Row(variant='compact'):
gr.Markdown(f"### [{roop.metadata.name} {roop.metadata.version}](https://github.com/C0untFloyd/roop-unleashed)")
gr.HTML(util.create_version_html(), elem_id="versions")
Expand Down
2 changes: 1 addition & 1 deletion ui/tabs/extras_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def on_frame_process(files, filterselection, upscaleselection):
filter = next((x for x in frame_upscalers_map.keys() if x == upscaleselection), None)
if filter is not None:
processoroptions.update(frame_upscalers_map[filter])
options = ProcessOptions(processoroptions, 0, 0, "all", 0, None, None, None, False)
options = ProcessOptions(processoroptions, 0, 0, "all", 0, None, None, 0, 128, False)
batch_process_with_options(list_files_process, options, None)
outdir = pathlib.Path(roop.globals.output_path)
outfiles = [str(item) for item in outdir.rglob("*") if item.is_file()]
Expand Down
19 changes: 12 additions & 7 deletions ui/tabs/faceswap_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,15 @@ def faceswap_tab():
with gr.Column(scale=1):
selected_face_detection = gr.Dropdown(["First found", "All female", "All male", "All faces", "Selected face"], value="First found", label="Specify face selection for swapping")
with gr.Column(scale=1):
num_swap_steps = gr.Slider(1, 5, value=1, step=1.0, label="Number of swapping steps", info="More steps may increase likeness")
with gr.Column(scale=2):
ui.globals.ui_selected_enhancer = gr.Dropdown(["None", "Codeformer", "DMDNet", "GFPGAN", "GPEN", "Restoreformer++"], value="None", label="Select post-processing")

with gr.Row(variant='panel'):
with gr.Column(scale=1):
max_face_distance = gr.Slider(0.01, 1.0, value=0.65, label="Max Face Similarity Threshold", info="0.0 = identical 1.0 = no similarity")
with gr.Column(scale=1):
num_swap_steps = gr.Slider(1, 5, value=1, step=1.0, label="Number of swapping steps", info="More steps can increase likeness")
ui.globals.ui_upscale = gr.Dropdown(["128px", "256px", "512px"], value="128px", label="Subsample upscale to", interactive=True)
with gr.Column(scale=2):
ui.globals.ui_blend_ratio = gr.Slider(0.0, 1.0, value=0.65, label="Original/Enhanced image blend ratio", info="Only used with active post-processing")

Expand Down Expand Up @@ -140,7 +142,7 @@ def faceswap_tab():
resultvideo = gr.Video(label='Final Video', interactive=False, visible=False)

previewinputs = [preview_frame_num, bt_destfiles, fake_preview, ui.globals.ui_selected_enhancer, selected_face_detection,
max_face_distance, ui.globals.ui_blend_ratio, selected_mask_engine, clip_text, no_face_action, vr_mode, autorotate, maskimage, chk_showmaskoffsets, num_swap_steps]
max_face_distance, ui.globals.ui_blend_ratio, selected_mask_engine, clip_text, no_face_action, vr_mode, autorotate, maskimage, chk_showmaskoffsets, num_swap_steps, ui.globals.ui_upscale]
previewoutputs = [previewimage, maskimage, preview_frame_num]
input_faces.select(on_select_input_face, None, None).then(fn=on_preview_frame_changed, inputs=previewinputs, outputs=previewoutputs)
bt_remove_selected_input_face.click(fn=remove_selected_input_face, outputs=[input_faces])
Expand Down Expand Up @@ -176,7 +178,7 @@ def faceswap_tab():

start_event = bt_start.click(fn=start_swap,
inputs=[ui.globals.ui_selected_enhancer, selected_face_detection, roop.globals.keep_frames, roop.globals.wait_after_extraction,
roop.globals.skip_audio, max_face_distance, ui.globals.ui_blend_ratio, selected_mask_engine, clip_text,video_swapping_method, no_face_action, vr_mode, autorotate, num_swap_steps, maskimage],
roop.globals.skip_audio, max_face_distance, ui.globals.ui_blend_ratio, selected_mask_engine, clip_text,video_swapping_method, no_face_action, vr_mode, autorotate, num_swap_steps, ui.globals.ui_upscale, maskimage],
outputs=[bt_start, bt_stop, resultfiles], show_progress='full')
after_swap_event = start_event.then(fn=on_resultfiles_finished, inputs=[resultfiles], outputs=[resultimage, resultvideo])

Expand Down Expand Up @@ -407,7 +409,7 @@ def on_end_face_selection():


def on_preview_frame_changed(frame_num, files, fake_preview, enhancer, detection, face_distance, blend_ratio,
selected_mask_engine, clip_text, no_face_action, vr_mode, auto_rotate, maskimage, show_face_area, num_steps):
selected_mask_engine, clip_text, no_face_action, vr_mode, auto_rotate, maskimage, show_face_area, num_steps, upsample):
global SELECTED_INPUT_FACE_INDEX, manual_masking, current_video_fps

from roop.core import live_swap, get_processing_plugins
Expand Down Expand Up @@ -454,6 +456,8 @@ def on_preview_frame_changed(frame_num, files, fake_preview, enhancer, detection
roop.globals.no_face_action = index_of_no_face_action(no_face_action)
roop.globals.vr_mode = vr_mode
roop.globals.autorotate_faces = auto_rotate
roop.globals.subsample_size = int(upsample[:3])


mask_engine = map_mask_engine(selected_mask_engine, clip_text)

Expand All @@ -464,7 +468,7 @@ def on_preview_frame_changed(frame_num, files, fake_preview, enhancer, detection
face_index = 0

options = ProcessOptions(get_processing_plugins(mask_engine), roop.globals.distance_threshold, roop.globals.blend_ratio,
roop.globals.face_swap_mode, face_index, clip_text, maskimage, num_steps, show_face_area)
roop.globals.face_swap_mode, face_index, clip_text, maskimage, num_steps, roop.globals.subsample_size, show_face_area)

current_frame = live_swap(current_frame, options)
if current_frame is None:
Expand Down Expand Up @@ -538,7 +542,7 @@ def on_preview_mask(frame_num, files, clip_text, mask_engine):
elif mask_engine == "DFL XSeg":
mask_engine = "mask_xseg"
options = ProcessOptions(get_processing_plugins(mask_engine), roop.globals.distance_threshold, roop.globals.blend_ratio,
"all", 0, clip_text, None, 0, False, True)
"all", 0, clip_text, None, 0, 128, False, True)

current_frame = live_swap(current_frame, options)
return util.convert_to_gradio(current_frame)
Expand Down Expand Up @@ -576,7 +580,7 @@ def translate_swap_mode(dropdown_text):


def start_swap( enhancer, detection, keep_frames, wait_after_extraction, skip_audio, face_distance, blend_ratio,
selected_mask_engine, clip_text, processing_method, no_face_action, vr_mode, autorotate, num_swap_steps, imagemask, progress=gr.Progress()):
selected_mask_engine, clip_text, processing_method, no_face_action, vr_mode, autorotate, num_swap_steps, upsample, imagemask, progress=gr.Progress()):
from ui.main import prepare_environment
from roop.core import batch_process_regular
global is_processing, list_files_process
Expand Down Expand Up @@ -604,6 +608,7 @@ def start_swap( enhancer, detection, keep_frames, wait_after_extraction, skip_au
roop.globals.no_face_action = index_of_no_face_action(no_face_action)
roop.globals.vr_mode = vr_mode
roop.globals.autorotate_faces = autorotate
roop.globals.subsample_size = int(upsample[:3])
mask_engine = map_mask_engine(selected_mask_engine, clip_text)

if roop.globals.face_swap_mode == 'selected':
Expand Down
Loading

0 comments on commit 126fd69

Please sign in to comment.