-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
inference_mrfa.py
131 lines (106 loc) · 4.33 KB
/
inference_mrfa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import numpy as np
import torch
import tqdm
import yaml
from comfy.utils import ProgressBar
from scipy.spatial import ConvexHull
from .module_mrfa.model import MRFA
from .module_mrfa.util import AntiAliasInterpolation2d
def mrfa_inference(
source_image: torch.Tensor,
driving_video: torch.Tensor,
config_path: str,
checkpoint_path: str,
use_relative=True,
relative_movement=True,
relative_jacobian=True,
adapt_movement_scale=False,
):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
with open(config_path) as f:
config = yaml.full_load(f)
model = MRFA(config)
model = torch.nn.DataParallel(model)
checkpoint = torch.load(checkpoint_path, map_location=device)
model.load_state_dict(checkpoint["model"], strict=False)
model = model.module
model.eval().to(device)
kp_detector = model.encoder
dense_motion_network = model.dense_motion
decoder = model.decoder
down = AntiAliasInterpolation2d(3, 0.25).to(device)
source = source_image.to(device) # 4D
driving = driving_video.to(device) #
num_frames = driving.shape[2]
pbar = ProgressBar(num_frames)
predictions = []
with torch.no_grad():
# visualizations = []
kp_source = kp_detector.forward(source)
kp_driving_initial = kp_detector.forward(driving[:, :, 0])
for frame_idx in tqdm.tqdm(
range(num_frames),
desc=f"Generating {'Relative' if use_relative else 'Absolute'} mode",
):
driving_frame = driving[:, :, frame_idx]
if use_relative:
kp_driving = kp_detector.forward(driving_frame)
kp_norm = normalize_kp(
kp_source=kp_source,
kp_driving=kp_driving,
kp_driving_initial=kp_driving_initial,
use_relative_movement=relative_movement,
use_relative_jacobian=relative_jacobian,
adapt_movement_scale=adapt_movement_scale,
)
dense_motion = dense_motion_network.forward(
source, kp_norm, kp_source, bg_param=None
)
if config["train_params"]["prior_model"] == "tpsm":
kp_s_value = kp_source["kp"].view(source.shape[0], -1, 5, 2).mean(2)
kp_d_value = kp_norm["kp"].view(driving.shape[0], -1, 5, 2).mean(2)
else:
kp_s_value = kp_source["kp"]
kp_d_value = kp_norm["kp"]
out, _, _ = decoder.forward(
kp_s_value,
kp_d_value,
dense_motion,
img=down(source),
img_full=source,
)
# visualization = Visualizer(**config['visualizer_params']).visualize(source=source, driving=driving_frame, out=out)
# visualizations.append(visualization)
else:
abs_inp = {"source": source, "driving": driving_frame}
out, _, _, _, _ = model.forward(abs_inp, is_train=False)
prediction = out.data.cpu().numpy()
prediction = np.transpose(prediction, [0, 2, 3, 1]).squeeze(0)
predictions.append(prediction)
pbar.update_absolute(frame_idx, num_frames)
return predictions
def normalize_kp(
kp_source,
kp_driving,
kp_driving_initial,
adapt_movement_scale=False,
use_relative_movement=False,
use_relative_jacobian=False,
):
if adapt_movement_scale:
source_area = ConvexHull(kp_source["kp"][0].data.cpu().numpy()).volume
driving_area = ConvexHull(kp_driving_initial["kp"][0].data.cpu().numpy()).volume
adapt_movement_scale = np.sqrt(source_area) / np.sqrt(driving_area)
else:
adapt_movement_scale = 1
kp_new = {k: v for k, v in kp_driving.items()}
if use_relative_movement:
kp_value_diff = kp_driving["kp"] - kp_driving_initial["kp"]
kp_value_diff *= adapt_movement_scale
kp_new["kp"] = kp_value_diff + kp_source["kp"]
if use_relative_jacobian:
jacobian_diff = torch.matmul(
kp_driving["jacobian"], torch.inverse(kp_driving_initial["jacobian"])
)
kp_new["jacobian"] = torch.matmul(jacobian_diff, kp_source["jacobian"])
return kp_new