-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvideo_generator.py
202 lines (166 loc) · 9.26 KB
/
video_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import sys
import os
import json
import cv2
from moviepy.editor import VideoFileClip, AudioFileClip
import argparse
import numpy as np
from lipsync_jeff import LipSync
from play_video import VideoPlayer
from PyQt5 import QtWidgets
duration = 95
fps = 60
"""
This script generates a lip-sync video by combining viseme images (mouth shapes) with corresponding audio, creating a synchronized video. It uses viseme data from a JSON file to determine the timing of each viseme image and overlays the audio on the generated video. The `VideoMaker` class is the core component that handles video generation and audio synchronization. The script supports multiple modes (e.g., "beff-mode", "Hulk-mode") and allows for dynamic video creation based on the mode selected.
### Key Components:
1. **`VideoMaker` Class**:
- This class handles the video generation from viseme images and metadata, and then combines the video with audio to create a final synchronized lip-sync video.
- The class reads viseme metadata (timing and ID) from a JSON file and maps these visemes to corresponding images stored in a directory.
- The video is generated frame-by-frame based on the viseme timings, and the audio is added afterward.
2. **LipSync Mode**:
- The script allows for different "modes" such as "beff-mode" and "Hulk-mode", which use the `LipSync` class to generate a video with different characteristics.
- Depending on the mode selected, the script can either use predefined lip-sync behavior or generate a regular video using the provided viseme metadata and images.
3. **Audio and Video Synchronization**:
- After the video is generated from viseme images, the script uses `moviepy` to merge the corresponding audio.
- If the audio is longer than the video, it is clipped to match the video duration, ensuring synchronization between the audio and video.
### Key Functions in the `VideoMaker` Class:
1. **`__init__(self, images_dir, visemes_dir, audio_dir, out_dir, fps, map_file, callback, mode)`**:
- Initializes the class with directories for viseme images, metadata, audio files, output video, and other configurations such as FPS (frames per second) and mode.
2. **`generate_video(self, in_file)`**:
- Generates a video from viseme images and metadata stored in JSON files.
- It reads viseme timings from the JSON file and creates the video by displaying the corresponding viseme images for each time interval.
3. **`add_audio(self, audio_file, video_file)`**:
- Adds an audio track to the generated video using `moviepy`.
- The audio is synchronized with the video, and the script clips either the audio or video to ensure they match in duration.
4. **`make_frame(self, id)`**:
- Loads and processes the viseme image corresponding to the given ID. This image is resized and rotated before being added to the video.
### How to Use:
1. **Set Up the Required Libraries**:
Ensure you have the required Python libraries installed:
```bash
pip install opencv-python moviepy numpy PyQt5
"""
class VideoMaker:
def __init__(self, images_dir, visemes_dir, audio_dir, out_dir, fps, map_file, callback, mode):
self.fourcc = cv2.VideoWriter_fourcc(*"mp4v")
self.height, self.width = self.get_im_dims(images_dir)
self.im_dir = images_dir
self.metadata_dir = visemes_dir
self.audio_dir = audio_dir
self.out_dir = out_dir
self.fps = 60
self.duration = 0
self.callback = callback
self.mode = mode
print("Init VideoMaker")
def load_json(self, file):
with open(file, "r") as opened_file:
return json.load(opened_file)
def get_im_dims(self, im_dir): # should first check the file is an image
for image in os.listdir(im_dir):
try:
frame = cv2.imread(os.path.join(im_dir, image))
height, width, channels = frame.shape
return height, width
except Exception as e:
print(f"Error: {e}")
continue
def get_out(self, out_path):
return cv2.VideoWriter(out_path, self.fourcc, self.fps, (self.width, self.height))
def read_chunk_data(self, chunk):
return chunk["id"], chunk["offset"]
def make_frame(self, id):
print(f"Generating frame for viseme id {id}.")
frame = cv2.imread(os.path.join(self.im_dir, f"viseme-id-{id}.jpg"))
frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
return cv2.resize(frame, (self.width, self.height))
def frame_to_video(self, output, frame, dur):
for i in range(int(np.round(dur / 1000 * self.fps, 0))):
output.write(frame)
def generate_video(self, in_file):
if(self.mode == "beff-mode"):
print("\n Beff Mode \n")
lipSync = LipSync("beff")
lipSync.generateVideo()
return
elif(self.mode == "Hulk-mode"):
print("\n Hulk Mode \n")
lipSync = LipSync("hulk")
lipSync.generateVideo()
return
in_path = os.path.join(self.metadata_dir, in_file)
self.out_path = os.path.join(self.out_dir, f'{in_file.strip(".json")}_{self.fps}.mp4')
print(f"Generating video from {self.out_path}.")
output = self.get_out(self.out_path)
data = self.load_json("metadata/text_to_viseme.json")
print(len(data))
total_time = 0
viseme_dur = 0
for chunk in data:
mapped, dur = self.read_chunk_data(chunk)
dur = dur - total_time
print(f"Viseme id {mapped} has duration {dur} milliseconds.")
total_time += dur
frame = self.make_frame(mapped)
self.frame_to_video(output, frame, dur)
viseme_dur += dur
output.release()
cv2.destroyAllWindows()
print(f"Generated video of {viseme_dur} milliseconds from viseme images.")
def add_audio(self, audio_file, video_file):
video_clip = VideoFileClip(video_file)
audio_clip = AudioFileClip(audio_file)
print("Audio File: " + audio_file)
print(f"Adding audio stream of {audio_clip.end} milliseconds.")
if video_clip.end < audio_clip.end:
audio_clip = audio_clip.subclip(0, video_clip.end)
print(f"Clipped audio file to {video_clip.end} milliseconds.")
elif audio_clip.end < video_clip.end:
video_clip = video_clip.subclip(0, audio_clip.end)
print(f"Clipped video file to {audio_clip.end} milliseconds.")
final_clip = video_clip.set_audio(audio_clip)
final_clip.write_videofile(f'video/temp.mp4', codec="libx264", audio_codec="aac")
final_video = video_clip.set_audio(audio_clip)
print(f"Successfully generated video of {final_video.end} milliseconds from video and audio streams.")
video_out_path = f'video/2{os.path.basename(self.im_dir)}_with_audio_{video_file.strip(".json").strip("video/")}'
# final_video.write_videofile(video_out_path, fps=self.fps)
final_video.write_videofile(video_out_path, fps=self.fps, audio_codec="aac")
print(f"Video successfully saved to {video_out_path}.")
self.callback()
def main():
parser = argparse.ArgumentParser(
description="Specify metadata, audio, image and output directories, and viseme mapping file."
)
parser.add_argument("--im_dir", type=str, default="image/mouth", help="Directory with viseme images.")
parser.add_argument(
"--metadata_dir", type=str, default="metadata", help="Directory containing viseme metadata .json files."
)
parser.add_argument("--audio_dir", type=str, default="audio", help="Directory containing .wav audio files.")
parser.add_argument("--out_dir", type=str, default="video", help="Directory to save generated video.")
parser.add_argument("--fps", type=int, default=60, help="Frame rate (in frames per second) to generate video.")
parser.add_argument("--map", type=str, default="map/viseme_map.json", help="Path to viseme mapping file.")
parser.add_argument("--no_audio", action="store_true", help="Generated video without audio.")
args = parser.parse_args()
viseme_video_maker = VideoMaker(args.im_dir, args.metadata_dir, args.audio_dir, args.out_dir, args.fps, args.map, None)
for in_file in os.listdir(args.metadata_dir):
if ".json" not in in_file:
continue
else:
viseme_video_maker.generate_video(in_file)
print(f"Generated video from {in_file}.")
if (viseme_video_maker.mode != "beff-mode"):
if args.no_audio is not True:
viseme_video_maker.add_audio(f'audio/text_to_audio.wav', viseme_video_maker.out_path)
# combine_audio_video(audio_file_path_new, video_file_path_new, output_file_path_new)
def combine_audio_video(audio_file_path, video_file_path, output_file_path):
audio_clip = AudioFileClip(audio_file_path)
video_clip = VideoFileClip(video_file_path)
final_clip = video_clip.set_audio(audio_clip)
final_clip.write_videofile(output_file_path, codec="libx264", audio_codec="aac")
print("Done")
# Update these paths according to your files' locations
audio_file_path_new = '/audio/text_to_audio.wav'
video_file_path_new = '/video/video.mp4'
output_file_path_new = '/video/output_video.mp4'
if __name__ == "__main__":
main()