-
Notifications
You must be signed in to change notification settings - Fork 1
/
tools.py
79 lines (62 loc) · 2.26 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import cv2
import numpy as np
import torch
from einops import rearrange
from midas import MiDaSInference
model = None
def HWC3(x):
assert x.dtype == np.uint8
if x.ndim == 2:
x = x[:, :, None]
assert x.ndim == 3
H, W, C = x.shape
assert C == 1 or C == 3 or C == 4
if C == 3:
return x
if C == 1:
return np.concatenate([x, x, x], axis=2)
if C == 4:
color = x[:, :, 0:3].astype(np.float32)
alpha = x[:, :, 3:4].astype(np.float32) / 255.0
y = color * alpha + 255.0 * (1.0 - alpha)
y = y.clip(0, 255).astype(np.uint8)
return y
def apply_midas(input_image):
global model
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
if model is None:
model = MiDaSInference(model_type="dpt_large", device=device)
assert input_image.ndim == 3
image_depth = input_image
with torch.no_grad():
image_depth = torch.from_numpy(image_depth).float().to(device)
image_depth = image_depth / 127.5 - 1.0
image_depth = rearrange(image_depth, 'h w c -> 1 c h w')
depth = model(image_depth)[0]
depth_pt = depth.clone()
depth_pt -= torch.min(depth_pt)
depth_pt /= torch.max(depth_pt)
depth_pt = depth_pt.cpu().numpy()
depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8)
return depth_image
def pad64(x):
return int(np.ceil(float(x) / 64.0) * 64 - x)
def safer_memory(x):
# Fix many MAC/AMD problems
return np.ascontiguousarray(x.copy()).copy()
def resize_image_with_pad(input_image, resolution, skip_hwc3=False):
if skip_hwc3:
img = input_image
else:
img = HWC3(input_image)
H_raw, W_raw, _ = img.shape
k = float(resolution) / float(min(H_raw, W_raw))
interpolation = cv2.INTER_CUBIC if k > 1 else cv2.INTER_AREA
H_target = int(np.round(float(H_raw) * k))
W_target = int(np.round(float(W_raw) * k))
img = cv2.resize(img, (W_target, H_target), interpolation=interpolation)
H_pad, W_pad = pad64(H_target), pad64(W_target)
img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode='edge')
def remove_pad(x):
return safer_memory(x[:H_target, :W_target])
return safer_memory(img_padded), remove_pad