-
Notifications
You must be signed in to change notification settings - Fork 43
/
controlnet_union_test_scribble.py
115 lines (85 loc) · 4.43 KB
/
controlnet_union_test_scribble.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import cv2
import torch
import random
import numpy as np
from PIL import Image
from diffusers import AutoencoderKL
from diffusers import EulerAncestralDiscreteScheduler
from controlnet_aux import HEDdetector
from models.controlnet_union import ControlNetModel_Union
from pipeline.pipeline_controlnet_union_sd_xl import StableDiffusionXLControlNetUnionPipeline
def nms(x, t, s):
x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
y = np.zeros_like(x)
for f in [f1, f2, f3, f4]:
np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
z = np.zeros_like(y, dtype=np.uint8)
z[y > t] = 255
return z
device=torch.device('cuda:0')
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
controlnet_model = ControlNetModel_Union.from_pretrained("xinsir/controlnet-union-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True)
pipe = StableDiffusionXLControlNetUnionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet_model,
vae=vae,
torch_dtype=torch.float16,
scheduler=eulera_scheduler,
)
pipe = pipe.to(device)
processor = HEDdetector.from_pretrained('lllyasviel/Annotators')
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
# you can use either hed to generate a fake scribble given an image or a sketch image totally draw by yourself
if random.random() > 0.5:
# Method 1
# if you use hed, you should provide an image, the image can be real or anime, you extract its hed lines and use it as the scribbles
# The detail about hed detect you can refer to https://github.com/lllyasviel/ControlNet/blob/main/gradio_fake_scribble2image.py
# Below is a example using diffusers HED detector
image_path = Image.open("your image path, the image can be real or anime, HED detector will extract its edge boundery")
controlnet_img = processor(image_path, scribble=False)
controlnet_img.save("a hed detect path for an image")
# following is some processing to simulate human sketch draw, different threshold can generate different width of lines
controlnet_img = np.array(controlnet_img)
controlnet_img = nms(controlnet_img, 127, 3)
controlnet_img = cv2.GaussianBlur(controlnet_img, (0, 0), 3)
# higher threshold, thiner line
random_val = int(round(random.uniform(0.01, 0.10), 2) * 255)
controlnet_img[controlnet_img > random_val] = 255
controlnet_img[controlnet_img < 255] = 0
controlnet_img = Image.fromarray(controlnet_img)
else:
# Method 2
# if you use a sketch image total draw by yourself
control_path = "the sketch image you draw with some tools, like drawing board, the path you save it"
controlnet_img = Image.open(control_path) # Note that the image must be black-white(0 or 255), like the examples we list
# must resize to 1024*1024 or same resolution bucket to get the best performance
width, height = controlnet_img.size
ratio = np.sqrt(1024. * 1024. / (width * height))
new_width, new_height = int(width * ratio), int(height * ratio)
controlnet_img = controlnet_img.resize((new_width, new_height))
seed = random.randint(0, 2147483647)
generator = torch.Generator('cuda').manual_seed(seed)
# 0 -- openpose
# 1 -- depth
# 2 -- hed/pidi/scribble/ted
# 3 -- canny/lineart/anime_lineart/mlsd
# 4 -- normal
# 5 -- segment
images = pipe(prompt=[prompt]*1,
image_list=[0, 0, controlnet_img, 0, 0, 0],
negative_prompt=[negative_prompt]*1,
generator=generator,
width=new_width,
height=new_height,
num_inference_steps=30,
union_control=True,
union_control_type=torch.Tensor([0, 0, 1, 0, 0, 0]),
).images
images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")