From c74ac4aed8eeb6cb7587e4ef8d01024c1cb09b46 Mon Sep 17 00:00:00 2001 From: Dev Aggarwal Date: Wed, 7 Aug 2024 03:10:21 +0530 Subject: [PATCH] fix BrokenPipeError caused by image dimensions not divisible by 2 Handle ffmpeg errors properly and ensure even image dimensions before processing --- chart/model-values.yaml | 2 +- ffmpeg_util.py | 30 ++++++++++++++++++++++++++++-- retro/gfpgan.py | 4 +++- retro/sadtalker.py | 6 ++++-- retro/wav2lip.py | 6 +++--- 5 files changed, 39 insertions(+), 9 deletions(-) diff --git a/chart/model-values.yaml b/chart/model-values.yaml index 13bc085..d933e45 100644 --- a/chart/model-values.yaml +++ b/chart/model-values.yaml @@ -304,7 +304,7 @@ deployments: thenlper/gte-base - name: "retro-sadtalker" - image: "crgooeyprodwestus1.azurecr.io/gooey-gpu-retro:10" + image: "crgooeyprodwestus1.azurecr.io/gooey-gpu-retro:12" autoscaling: queueLength: 2 minReplicaCount: 3 diff --git a/ffmpeg_util.py b/ffmpeg_util.py index 7540672..e63ac28 100644 --- a/ffmpeg_util.py +++ b/ffmpeg_util.py @@ -107,9 +107,15 @@ def ffmpeg_read_input_frames( "pipe:1", ] # fmt:skip print("\t$ " + " ".join(cmd_args)) - ffproc = subprocess.Popen(cmd_args, stdout=subprocess.PIPE) + ffproc = subprocess.Popen(cmd_args, stderr=subprocess.PIPE, stdout=subprocess.PIPE) while True: + retcode = ffproc.poll() + if retcode is not None: + output = ffproc.stderr.read().decode() + err = subprocess.SubprocessError(output) + err.__cause__ = subprocess.CalledProcessError(retcode, ffproc.args, output) + raise UserError(FFMPEG_ERR_MSG) from err im_bytes = ffproc.stdout.read(height * width * 3) if not im_bytes: break @@ -138,7 +144,27 @@ def ffmpeg_get_writer_proc( output_path, ] # fmt:skip print("\t$ " + " ".join(cmd_args)) - return subprocess.Popen(cmd_args, stdin=subprocess.PIPE) + return subprocess.Popen( + cmd_args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + + +def ensure_img_even_dimensions(img: np.ndarray) -> np.ndarray: + """make sure the image dimensions are divisble by 2 for ffmpeg libx264""" + return img[: img.shape[0] // 2 * 2, : img.shape[1] // 2 * 2] + + +def ffmpeg_write_output_frame(ffproc: subprocess.Popen, img: np.ndarray): + retcode = ffproc.poll() + if retcode is not None: + output = ffproc.stdout.read().decode() + err = subprocess.SubprocessError(output) + err.__cause__ = subprocess.CalledProcessError(retcode, ffproc.args, output) + raise UserError(FFMPEG_ERR_MSG) from err + ffproc.stdin.write(img.tostring()) def ffmpeg(*args) -> str: diff --git a/retro/gfpgan.py b/retro/gfpgan.py index 63f4425..879d5dc 100644 --- a/retro/gfpgan.py +++ b/retro/gfpgan.py @@ -13,6 +13,7 @@ import gooey_gpu from celeryconfig import app, setup_queues +from ffmpeg_util import ensure_img_even_dimensions MAX_RES = 1920 * 1080 @@ -140,6 +141,7 @@ def run_enhancer( response.output.codec_name = "png" break + restored_img = ensure_img_even_dimensions(restored_img) if ffproc is None: response.output.width = restored_img.shape[1] response.output.height = restored_img.shape[0] @@ -151,7 +153,7 @@ def run_enhancer( output_path=output_path, audio_path=input_path, ) - ffproc.stdin.write(restored_img.tostring()) + gooey_gpu.ffmpeg_write_output_frame(ffproc, restored_img) response.output.num_frames += 1 if ffproc is not None: diff --git a/retro/sadtalker.py b/retro/sadtalker.py index ced0729..e725bdd 100644 --- a/retro/sadtalker.py +++ b/retro/sadtalker.py @@ -15,6 +15,7 @@ import gooey_gpu from celeryconfig import app, setup_queues +from ffmpeg_util import ensure_img_even_dimensions sadtalker_lib_path = os.path.join(os.path.dirname(__file__), "SadTalker") sys.path.append(sadtalker_lib_path) @@ -338,7 +339,7 @@ def animate_from_coeff_generate( video_name = x["video_name"] + ".mp4" return_path = str(os.path.join(video_save_dir, video_name)) - img_size = int(img_size) // 2 * 2 + img_size = int(img_size) // 2 * 2 # make sure its divisble by 2 for ffmpeg libx264 original_size = crop_info[0] if original_size: frame_w, frame_h = ( @@ -413,6 +414,7 @@ def animate_from_coeff_generate( "Please use the crop mode and make sure the input face is clear or try a different aspect ratio. " "Humanoid faces and solid backgrounds work best." ) from e + out_image = ensure_img_even_dimensions(out_image) if ffproc is None: out_meta.width = out_image.shape[1] @@ -424,7 +426,7 @@ def animate_from_coeff_generate( fps=out_meta.fps, audio_path=x["audio_path"], ) - ffproc.stdin.write(out_image.tostring()) + gooey_gpu.ffmpeg_write_output_frame(ffproc, out_image) out_meta.num_frames += 1 ffproc.stdin.close() diff --git a/retro/wav2lip.py b/retro/wav2lip.py index 3872ec4..60afa49 100644 --- a/retro/wav2lip.py +++ b/retro/wav2lip.py @@ -182,7 +182,7 @@ def main(model, detector, outfile: str, inputs: Wav2LipInputs): ) ) - if idx == 0: + if ffproc is None: frame_h, frame_w = frame_batch[0].shape[:-1] gooey_gpu.ffmpeg( # "-thread_queue_size", "128", @@ -238,7 +238,7 @@ def main(model, detector, outfile: str, inputs: Wav2LipInputs): f[y1:y2, x1:x2] = p cv2.imwrite(f"{outfile}_{idx}.png", f) - ffproc.stdin.write(f.tostring()) + gooey_gpu.ffmpeg_write_output_frame(ffproc, f) if input_stream: input_stream.release() @@ -271,7 +271,7 @@ def resize_frame(frame, out_height: int) -> np.ndarray: ) aspect_ratio = frame.shape[1] / frame.shape[0] out_width = int(out_height * aspect_ratio) - if out_width % 2 != 0: + if out_width % 2 != 0: # make sure its divisble by 2 for ffmpeg libx264 out_width -= 1 frame = cv2.resize(frame, (out_width, out_height)) return frame