From c74ac4aed8eeb6cb7587e4ef8d01024c1cb09b46 Mon Sep 17 00:00:00 2001
From: Dev Aggarwal <devxpy@gmail.com>
Date: Wed, 7 Aug 2024 03:10:21 +0530
Subject: [PATCH] fix BrokenPipeError caused by image dimensions not divisible
 by 2

Handle ffmpeg errors properly and ensure even image dimensions before processing
---
 chart/model-values.yaml |  2 +-
 ffmpeg_util.py          | 30 ++++++++++++++++++++++++++++--
 retro/gfpgan.py         |  4 +++-
 retro/sadtalker.py      |  6 ++++--
 retro/wav2lip.py        |  6 +++---
 5 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/chart/model-values.yaml b/chart/model-values.yaml
index 13bc085..d933e45 100644
--- a/chart/model-values.yaml
+++ b/chart/model-values.yaml
@@ -304,7 +304,7 @@ deployments:
         thenlper/gte-base
 
   - name: "retro-sadtalker"
-    image: "crgooeyprodwestus1.azurecr.io/gooey-gpu-retro:10"
+    image: "crgooeyprodwestus1.azurecr.io/gooey-gpu-retro:12"
     autoscaling:
       queueLength: 2
       minReplicaCount: 3
diff --git a/ffmpeg_util.py b/ffmpeg_util.py
index 7540672..e63ac28 100644
--- a/ffmpeg_util.py
+++ b/ffmpeg_util.py
@@ -107,9 +107,15 @@ def ffmpeg_read_input_frames(
         "pipe:1",
     ]  # fmt:skip
     print("\t$ " + " ".join(cmd_args))
-    ffproc = subprocess.Popen(cmd_args, stdout=subprocess.PIPE)
+    ffproc = subprocess.Popen(cmd_args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
 
     while True:
+        retcode = ffproc.poll()
+        if retcode is not None:
+            output = ffproc.stderr.read().decode()
+            err = subprocess.SubprocessError(output)
+            err.__cause__ = subprocess.CalledProcessError(retcode, ffproc.args, output)
+            raise UserError(FFMPEG_ERR_MSG) from err
         im_bytes = ffproc.stdout.read(height * width * 3)
         if not im_bytes:
             break
@@ -138,7 +144,27 @@ def ffmpeg_get_writer_proc(
         output_path,
     ]  # fmt:skip
     print("\t$ " + " ".join(cmd_args))
-    return subprocess.Popen(cmd_args, stdin=subprocess.PIPE)
+    return subprocess.Popen(
+        cmd_args,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+    )
+
+
+def ensure_img_even_dimensions(img: np.ndarray) -> np.ndarray:
+    """make sure the image dimensions are divisble by 2 for ffmpeg libx264"""
+    return img[: img.shape[0] // 2 * 2, : img.shape[1] // 2 * 2]
+
+
+def ffmpeg_write_output_frame(ffproc: subprocess.Popen, img: np.ndarray):
+    retcode = ffproc.poll()
+    if retcode is not None:
+        output = ffproc.stdout.read().decode()
+        err = subprocess.SubprocessError(output)
+        err.__cause__ = subprocess.CalledProcessError(retcode, ffproc.args, output)
+        raise UserError(FFMPEG_ERR_MSG) from err
+    ffproc.stdin.write(img.tostring())
 
 
 def ffmpeg(*args) -> str:
diff --git a/retro/gfpgan.py b/retro/gfpgan.py
index 63f4425..879d5dc 100644
--- a/retro/gfpgan.py
+++ b/retro/gfpgan.py
@@ -13,6 +13,7 @@
 
 import gooey_gpu
 from celeryconfig import app, setup_queues
+from ffmpeg_util import ensure_img_even_dimensions
 
 MAX_RES = 1920 * 1080
 
@@ -140,6 +141,7 @@ def run_enhancer(
                 response.output.codec_name = "png"
                 break
 
+            restored_img = ensure_img_even_dimensions(restored_img)
             if ffproc is None:
                 response.output.width = restored_img.shape[1]
                 response.output.height = restored_img.shape[0]
@@ -151,7 +153,7 @@ def run_enhancer(
                     output_path=output_path,
                     audio_path=input_path,
                 )
-            ffproc.stdin.write(restored_img.tostring())
+            gooey_gpu.ffmpeg_write_output_frame(ffproc, restored_img)
             response.output.num_frames += 1
 
         if ffproc is not None:
diff --git a/retro/sadtalker.py b/retro/sadtalker.py
index ced0729..e725bdd 100644
--- a/retro/sadtalker.py
+++ b/retro/sadtalker.py
@@ -15,6 +15,7 @@
 
 import gooey_gpu
 from celeryconfig import app, setup_queues
+from ffmpeg_util import ensure_img_even_dimensions
 
 sadtalker_lib_path = os.path.join(os.path.dirname(__file__), "SadTalker")
 sys.path.append(sadtalker_lib_path)
@@ -338,7 +339,7 @@ def animate_from_coeff_generate(
     video_name = x["video_name"] + ".mp4"
     return_path = str(os.path.join(video_save_dir, video_name))
 
-    img_size = int(img_size) // 2 * 2
+    img_size = int(img_size) // 2 * 2  # make sure its divisble by 2 for ffmpeg libx264
     original_size = crop_info[0]
     if original_size:
         frame_w, frame_h = (
@@ -413,6 +414,7 @@ def animate_from_coeff_generate(
                         "Please use the crop mode and make sure the input face is clear or try a different aspect ratio. "
                         "Humanoid faces and solid backgrounds work best."
                     ) from e
+                out_image = ensure_img_even_dimensions(out_image)
 
             if ffproc is None:
                 out_meta.width = out_image.shape[1]
@@ -424,7 +426,7 @@ def animate_from_coeff_generate(
                     fps=out_meta.fps,
                     audio_path=x["audio_path"],
                 )
-            ffproc.stdin.write(out_image.tostring())
+            gooey_gpu.ffmpeg_write_output_frame(ffproc, out_image)
             out_meta.num_frames += 1
 
     ffproc.stdin.close()
diff --git a/retro/wav2lip.py b/retro/wav2lip.py
index 3872ec4..60afa49 100644
--- a/retro/wav2lip.py
+++ b/retro/wav2lip.py
@@ -182,7 +182,7 @@ def main(model, detector, outfile: str, inputs: Wav2LipInputs):
                 )
             )
 
-        if idx == 0:
+        if ffproc is None:
             frame_h, frame_w = frame_batch[0].shape[:-1]
             gooey_gpu.ffmpeg(
                 # "-thread_queue_size", "128",
@@ -238,7 +238,7 @@ def main(model, detector, outfile: str, inputs: Wav2LipInputs):
 
             f[y1:y2, x1:x2] = p
             cv2.imwrite(f"{outfile}_{idx}.png", f)
-            ffproc.stdin.write(f.tostring())
+            gooey_gpu.ffmpeg_write_output_frame(ffproc, f)
 
     if input_stream:
         input_stream.release()
@@ -271,7 +271,7 @@ def resize_frame(frame, out_height: int) -> np.ndarray:
         )
     aspect_ratio = frame.shape[1] / frame.shape[0]
     out_width = int(out_height * aspect_ratio)
-    if out_width % 2 != 0:
+    if out_width % 2 != 0:  # make sure its divisble by 2 for ffmpeg libx264
         out_width -= 1
     frame = cv2.resize(frame, (out_width, out_height))
     return frame