Spaces:
Running
on
T4
Running
on
T4
Commit
·
9d36776
1
Parent(s):
d9c3123
update
Browse files- .gitignore +2 -1
- app.py +7 -5
- app_texnet.py +13 -10
- model.py +94 -4
- preprocessor.py +32 -0
- settings.py +5 -3
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
__pycache__
|
|
|
|
1 |
+
__pycache__
|
2 |
+
data
|
app.py
CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
|
|
4 |
import torch
|
5 |
|
6 |
from app_canny import create_demo as create_demo_canny
|
|
|
7 |
|
8 |
from model import Model
|
9 |
from settings import ALLOW_CHANGING_BASE_MODEL, DEFAULT_MODEL_ID, SHOW_DUPLICATE_BUTTON
|
@@ -13,7 +14,8 @@ DESCRIPTION = "# Material Authoring Demo v0.1. Under Construction"
|
|
13 |
if not torch.cuda.is_available():
|
14 |
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
|
15 |
|
16 |
-
model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="Canny")
|
|
|
17 |
|
18 |
with gr.Blocks() as demo:
|
19 |
gr.Markdown(DESCRIPTION)
|
@@ -24,12 +26,12 @@ with gr.Blocks() as demo:
|
|
24 |
)
|
25 |
|
26 |
with gr.Tabs():
|
27 |
-
with gr.Tab("Canny"):
|
28 |
-
create_demo_canny(model.process_canny)
|
29 |
with gr.Tab("Texnet"):
|
30 |
-
|
31 |
with gr.Tab("Matnet"):
|
32 |
-
|
33 |
|
34 |
with gr.Accordion(label="Base model", open=False):
|
35 |
with gr.Row():
|
|
|
4 |
import torch
|
5 |
|
6 |
from app_canny import create_demo as create_demo_canny
|
7 |
+
from app_texnet import create_demo as create_demo_texnet
|
8 |
|
9 |
from model import Model
|
10 |
from settings import ALLOW_CHANGING_BASE_MODEL, DEFAULT_MODEL_ID, SHOW_DUPLICATE_BUTTON
|
|
|
14 |
if not torch.cuda.is_available():
|
15 |
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
|
16 |
|
17 |
+
# model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="Canny")
|
18 |
+
model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="texnet")
|
19 |
|
20 |
with gr.Blocks() as demo:
|
21 |
gr.Markdown(DESCRIPTION)
|
|
|
26 |
)
|
27 |
|
28 |
with gr.Tabs():
|
29 |
+
# with gr.Tab("Canny"):
|
30 |
+
# create_demo_canny(model.process_canny)
|
31 |
with gr.Tab("Texnet"):
|
32 |
+
create_demo_texnet(model.process_texnet)
|
33 |
with gr.Tab("Matnet"):
|
34 |
+
create_demo_texnet(model.process_texnet)
|
35 |
|
36 |
with gr.Accordion(label="Base model", open=False):
|
37 |
with gr.Row():
|
app_texnet.py
CHANGED
@@ -29,12 +29,6 @@ def create_demo(process):
|
|
29 |
value=DEFAULT_IMAGE_RESOLUTION,
|
30 |
step=256,
|
31 |
)
|
32 |
-
canny_low_threshold = gr.Slider(
|
33 |
-
label="Canny low threshold", minimum=1, maximum=255, value=100, step=1
|
34 |
-
)
|
35 |
-
canny_high_threshold = gr.Slider(
|
36 |
-
label="Canny high threshold", minimum=1, maximum=255, value=200, step=1
|
37 |
-
)
|
38 |
num_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=20, step=1)
|
39 |
guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
|
40 |
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
|
@@ -56,8 +50,6 @@ def create_demo(process):
|
|
56 |
num_steps,
|
57 |
guidance_scale,
|
58 |
seed,
|
59 |
-
canny_low_threshold,
|
60 |
-
canny_high_threshold,
|
61 |
]
|
62 |
prompt.submit(
|
63 |
fn=randomize_seed_fn,
|
@@ -72,12 +64,23 @@ def create_demo(process):
|
|
72 |
api_name="canny",
|
73 |
concurrency_id="main",
|
74 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
return demo
|
76 |
|
77 |
|
78 |
if __name__ == "__main__":
|
79 |
from model import Model
|
80 |
|
81 |
-
model = Model(task_name="
|
82 |
-
demo = create_demo(model.
|
83 |
demo.queue().launch()
|
|
|
29 |
value=DEFAULT_IMAGE_RESOLUTION,
|
30 |
step=256,
|
31 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
num_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=20, step=1)
|
33 |
guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
|
34 |
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
|
|
|
50 |
num_steps,
|
51 |
guidance_scale,
|
52 |
seed,
|
|
|
|
|
53 |
]
|
54 |
prompt.submit(
|
55 |
fn=randomize_seed_fn,
|
|
|
64 |
api_name="canny",
|
65 |
concurrency_id="main",
|
66 |
)
|
67 |
+
|
68 |
+
# TODO: setup several example images
|
69 |
+
gr.Examples(
|
70 |
+
fn=process,
|
71 |
+
inputs=inputs,
|
72 |
+
outputs=result,
|
73 |
+
examples=[
|
74 |
+
# ["/dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/bunny/fused/uv_normal.png", "feather"],
|
75 |
+
["./data/uv_normal.png", "feather"],
|
76 |
+
],
|
77 |
+
)
|
78 |
return demo
|
79 |
|
80 |
|
81 |
if __name__ == "__main__":
|
82 |
from model import Model
|
83 |
|
84 |
+
model = Model(task_name="Texnet")
|
85 |
+
demo = create_demo(model.process_texnet)
|
86 |
demo.queue().launch()
|
model.py
CHANGED
@@ -1,24 +1,29 @@
|
|
1 |
import gc
|
2 |
|
|
|
|
|
|
|
|
|
|
|
3 |
import numpy as np
|
4 |
import PIL.Image
|
5 |
import torch
|
6 |
from controlnet_aux.util import HWC3
|
7 |
-
import spaces #[uncomment to use ZeroGPU]
|
8 |
from diffusers import (
|
9 |
ControlNetModel,
|
10 |
DiffusionPipeline,
|
11 |
StableDiffusionControlNetPipeline,
|
|
|
12 |
UniPCMultistepScheduler,
|
13 |
)
|
14 |
-
|
15 |
from cv_utils import resize_image
|
16 |
from preprocessor import Preprocessor
|
17 |
from settings import MAX_IMAGE_RESOLUTION, MAX_NUM_IMAGES
|
18 |
|
19 |
CONTROLNET_MODEL_IDS = {
|
20 |
# "Openpose": "lllyasviel/control_v11p_sd15_openpose",
|
21 |
-
"Canny": "lllyasviel/control_v11p_sd15_canny",
|
22 |
# "MLSD": "lllyasviel/control_v11p_sd15_mlsd",
|
23 |
# "scribble": "lllyasviel/control_v11p_sd15_scribble",
|
24 |
# "softedge": "lllyasviel/control_v11p_sd15_softedge",
|
@@ -30,6 +35,8 @@ CONTROLNET_MODEL_IDS = {
|
|
30 |
# "shuffle": "lllyasviel/control_v11e_sd15_shuffle",
|
31 |
# "ip2p": "lllyasviel/control_v11e_sd15_ip2p",
|
32 |
# "inpaint": "lllyasviel/control_v11e_sd15_inpaint",
|
|
|
|
|
33 |
}
|
34 |
|
35 |
|
@@ -46,6 +53,9 @@ class Model:
|
|
46 |
self.base_model_id = ""
|
47 |
self.task_name = ""
|
48 |
self.pipe = self.load_pipe(base_model_id, task_name)
|
|
|
|
|
|
|
49 |
self.preprocessor = Preprocessor()
|
50 |
|
51 |
def load_pipe(self, base_model_id: str, task_name: str) -> DiffusionPipeline:
|
@@ -58,6 +68,14 @@ class Model:
|
|
58 |
return self.pipe
|
59 |
model_id = CONTROLNET_MODEL_IDS[task_name]
|
60 |
controlnet = ControlNetModel.from_pretrained(model_id, torch_dtype=torch.float16)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
62 |
base_model_id, safety_checker=None, controlnet=controlnet, torch_dtype=torch.float16
|
63 |
)
|
@@ -132,7 +150,79 @@ class Model:
|
|
132 |
image=control_image,
|
133 |
).images
|
134 |
|
135 |
-
@spaces.GPU #[uncomment to use ZeroGPU]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
@torch.inference_mode()
|
137 |
def process_canny(
|
138 |
self,
|
|
|
1 |
import gc
|
2 |
|
3 |
+
# get socket and check if the name is vgldgx01
|
4 |
+
import socket
|
5 |
+
if socket.gethostname() != "vgldgx01":
|
6 |
+
import spaces #[uncomment to use ZeroGPU]
|
7 |
+
|
8 |
import numpy as np
|
9 |
import PIL.Image
|
10 |
import torch
|
11 |
from controlnet_aux.util import HWC3
|
|
|
12 |
from diffusers import (
|
13 |
ControlNetModel,
|
14 |
DiffusionPipeline,
|
15 |
StableDiffusionControlNetPipeline,
|
16 |
+
StableDiffusionImg2ImgPipeline,
|
17 |
UniPCMultistepScheduler,
|
18 |
)
|
19 |
+
from torchvision import transforms
|
20 |
from cv_utils import resize_image
|
21 |
from preprocessor import Preprocessor
|
22 |
from settings import MAX_IMAGE_RESOLUTION, MAX_NUM_IMAGES
|
23 |
|
24 |
CONTROLNET_MODEL_IDS = {
|
25 |
# "Openpose": "lllyasviel/control_v11p_sd15_openpose",
|
26 |
+
# "Canny": "lllyasviel/control_v11p_sd15_canny",
|
27 |
# "MLSD": "lllyasviel/control_v11p_sd15_mlsd",
|
28 |
# "scribble": "lllyasviel/control_v11p_sd15_scribble",
|
29 |
# "softedge": "lllyasviel/control_v11p_sd15_softedge",
|
|
|
35 |
# "shuffle": "lllyasviel/control_v11e_sd15_shuffle",
|
36 |
# "ip2p": "lllyasviel/control_v11e_sd15_ip2p",
|
37 |
# "inpaint": "lllyasviel/control_v11e_sd15_inpaint",
|
38 |
+
# "texnet": "/home/jyang/projects/ObjectReal/logs/train_texnet_deploy/checkpoint-55000/controlnet" # load and call
|
39 |
+
"texnet": "jingyangcarl/texnet",
|
40 |
}
|
41 |
|
42 |
|
|
|
53 |
self.base_model_id = ""
|
54 |
self.task_name = ""
|
55 |
self.pipe = self.load_pipe(base_model_id, task_name)
|
56 |
+
self.pipe_base = StableDiffusionImg2ImgPipeline.from_pretrained(
|
57 |
+
'runwayml/stable-diffusion-v1-5', safety_checker=None, torch_dtype=torch.float16
|
58 |
+
).to(self.device)
|
59 |
self.preprocessor = Preprocessor()
|
60 |
|
61 |
def load_pipe(self, base_model_id: str, task_name: str) -> DiffusionPipeline:
|
|
|
68 |
return self.pipe
|
69 |
model_id = CONTROLNET_MODEL_IDS[task_name]
|
70 |
controlnet = ControlNetModel.from_pretrained(model_id, torch_dtype=torch.float16)
|
71 |
+
to_upload = False
|
72 |
+
if to_upload:
|
73 |
+
# confirm before uploading
|
74 |
+
confirm = input(f"Do you want to upload {model_id} to the hub? (y/n): ")
|
75 |
+
if confirm.lower() == "y":
|
76 |
+
controlnet.push_to_hub("jingyangcarl/texnet")
|
77 |
+
else:
|
78 |
+
print("Upload cancelled.")
|
79 |
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
80 |
base_model_id, safety_checker=None, controlnet=controlnet, torch_dtype=torch.float16
|
81 |
)
|
|
|
150 |
image=control_image,
|
151 |
).images
|
152 |
|
153 |
+
# @spaces.GPU #[uncomment to use ZeroGPU]
|
154 |
+
@torch.inference_mode()
|
155 |
+
def process_texnet(
|
156 |
+
self,
|
157 |
+
image: np.ndarray,
|
158 |
+
prompt: str,
|
159 |
+
additional_prompt: str,
|
160 |
+
negative_prompt: str,
|
161 |
+
num_images: int,
|
162 |
+
image_resolution: int,
|
163 |
+
num_steps: int,
|
164 |
+
guidance_scale: float,
|
165 |
+
seed: int,
|
166 |
+
low_threshold: int,
|
167 |
+
high_threshold: int,
|
168 |
+
) -> list[PIL.Image.Image]:
|
169 |
+
if image is None:
|
170 |
+
raise ValueError
|
171 |
+
if image_resolution > MAX_IMAGE_RESOLUTION:
|
172 |
+
raise ValueError
|
173 |
+
if num_images > MAX_NUM_IMAGES:
|
174 |
+
raise ValueError
|
175 |
+
|
176 |
+
self.preprocessor.load("texnet")
|
177 |
+
control_image = self.preprocessor(
|
178 |
+
image=image, low_threshold=low_threshold, high_threshold=high_threshold, image_resolution=image_resolution, output_type="pil"
|
179 |
+
)
|
180 |
+
|
181 |
+
self.load_controlnet_weight("texnet")
|
182 |
+
results_coarse = self.run_pipe(
|
183 |
+
prompt=self.get_prompt(prompt, additional_prompt),
|
184 |
+
negative_prompt=negative_prompt,
|
185 |
+
control_image=control_image,
|
186 |
+
num_images=num_images,
|
187 |
+
num_steps=num_steps,
|
188 |
+
guidance_scale=guidance_scale,
|
189 |
+
seed=seed,
|
190 |
+
)
|
191 |
+
|
192 |
+
# use img2img pipeline
|
193 |
+
self.pipe_backup = self.pipe
|
194 |
+
self.pipe = self.pipe_base
|
195 |
+
|
196 |
+
# refine
|
197 |
+
results_fine = []
|
198 |
+
for result_coarse in results_coarse:
|
199 |
+
# clean up GPU cache
|
200 |
+
torch.cuda.empty_cache()
|
201 |
+
gc.collect()
|
202 |
+
|
203 |
+
# masking
|
204 |
+
mask = (np.array(control_image).sum(axis=-1) == 0)[...,None]
|
205 |
+
image_masked = PIL.Image.fromarray(np.where(mask, control_image, result_coarse))
|
206 |
+
image_blurry = transforms.GaussianBlur(kernel_size=5, sigma=1)(image_masked)
|
207 |
+
result_fine = self.run_pipe(
|
208 |
+
# prompt=prompt,
|
209 |
+
prompt=self.get_prompt(prompt, additional_prompt),
|
210 |
+
negative_prompt=negative_prompt,
|
211 |
+
control_image=image_blurry,
|
212 |
+
num_images=1,
|
213 |
+
num_steps=num_steps,
|
214 |
+
guidance_scale=guidance_scale,
|
215 |
+
seed=seed,
|
216 |
+
)[0]
|
217 |
+
result_fine = PIL.Image.fromarray(np.where(mask, control_image, result_fine))
|
218 |
+
results_fine.append(result_fine)
|
219 |
+
|
220 |
+
# restore the original pipe
|
221 |
+
self.pipe = self.pipe_backup
|
222 |
+
|
223 |
+
return [control_image, *results_fine, control_image, *results_coarse,]
|
224 |
+
|
225 |
+
# @spaces.GPU #[uncomment to use ZeroGPU]
|
226 |
@torch.inference_mode()
|
227 |
def process_canny(
|
228 |
self,
|
preprocessor.py
CHANGED
@@ -60,6 +60,8 @@ class Preprocessor:
|
|
60 |
self.model = DepthEstimator()
|
61 |
elif name == "UPerNet":
|
62 |
self.model = ImageSegmentor()
|
|
|
|
|
63 |
else:
|
64 |
raise ValueError
|
65 |
torch.cuda.empty_cache()
|
@@ -86,3 +88,33 @@ class Preprocessor:
|
|
86 |
image = resize_image(image, resolution=image_resolution)
|
87 |
return PIL.Image.fromarray(image)
|
88 |
return self.model(image, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
self.model = DepthEstimator()
|
61 |
elif name == "UPerNet":
|
62 |
self.model = ImageSegmentor()
|
63 |
+
elif name == 'texnet':
|
64 |
+
self.model = TexnetPreprocessor()
|
65 |
else:
|
66 |
raise ValueError
|
67 |
torch.cuda.empty_cache()
|
|
|
88 |
image = resize_image(image, resolution=image_resolution)
|
89 |
return PIL.Image.fromarray(image)
|
90 |
return self.model(image, **kwargs)
|
91 |
+
|
92 |
+
|
93 |
+
# https://github.com/huggingface/controlnet_aux/blob/master/src/controlnet_aux/canny/__init__.py
|
94 |
+
class TexnetPreprocessor:
|
95 |
+
def __call__(self, input_image=None, low_threshold=100, high_threshold=200, image_resolution=512, output_type=None, **kwargs):
|
96 |
+
if "img" in kwargs:
|
97 |
+
warnings.warn("img is deprecated, please use `input_image=...` instead.", DeprecationWarning)
|
98 |
+
input_image = kwargs.pop("img")
|
99 |
+
|
100 |
+
if input_image is None:
|
101 |
+
raise ValueError("input_image must be defined.")
|
102 |
+
|
103 |
+
if not isinstance(input_image, np.ndarray):
|
104 |
+
input_image = np.array(input_image, dtype=np.uint8)
|
105 |
+
output_type = output_type or "pil"
|
106 |
+
else:
|
107 |
+
output_type = output_type or "np"
|
108 |
+
|
109 |
+
input_image = HWC3(input_image)
|
110 |
+
input_image = resize_image(input_image, image_resolution)
|
111 |
+
H, W, C = input_image.shape
|
112 |
+
|
113 |
+
# detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
|
114 |
+
output_image = input_image.copy()
|
115 |
+
|
116 |
+
if output_type == "pil":
|
117 |
+
# detected_map = Image.fromarray(detected_map)
|
118 |
+
output_image = PIL.Image.fromarray(output_image)
|
119 |
+
|
120 |
+
return output_image
|
settings.py
CHANGED
@@ -5,15 +5,17 @@ import numpy as np
|
|
5 |
DEFAULT_MODEL_ID = os.getenv("DEFAULT_MODEL_ID", "stable-diffusion-v1-5/stable-diffusion-v1-5")
|
6 |
|
7 |
MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "3"))
|
8 |
-
DEFAULT_NUM_IMAGES = min(MAX_NUM_IMAGES, int(os.getenv("DEFAULT_NUM_IMAGES", "
|
9 |
-
MAX_IMAGE_RESOLUTION = int(os.getenv("MAX_IMAGE_RESOLUTION", "
|
10 |
-
DEFAULT_IMAGE_RESOLUTION = min(MAX_IMAGE_RESOLUTION, int(os.getenv("DEFAULT_IMAGE_RESOLUTION", "
|
11 |
|
12 |
ALLOW_CHANGING_BASE_MODEL = os.getenv("SPACE_ID") != "hysts/ControlNet-v1-1"
|
13 |
SHOW_DUPLICATE_BUTTON = os.getenv("SHOW_DUPLICATE_BUTTON") == "1"
|
14 |
|
15 |
MAX_SEED = np.iinfo(np.int32).max
|
16 |
|
|
|
|
|
17 |
# setup CUDA
|
18 |
# disable the following when deployting to hugging face
|
19 |
# if os.getenv("CUDA_VISIBLE_DEVICES") is None:
|
|
|
5 |
DEFAULT_MODEL_ID = os.getenv("DEFAULT_MODEL_ID", "stable-diffusion-v1-5/stable-diffusion-v1-5")
|
6 |
|
7 |
MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "3"))
|
8 |
+
DEFAULT_NUM_IMAGES = min(MAX_NUM_IMAGES, int(os.getenv("DEFAULT_NUM_IMAGES", "2")))
|
9 |
+
MAX_IMAGE_RESOLUTION = int(os.getenv("MAX_IMAGE_RESOLUTION", "2048"))
|
10 |
+
DEFAULT_IMAGE_RESOLUTION = min(MAX_IMAGE_RESOLUTION, int(os.getenv("DEFAULT_IMAGE_RESOLUTION", "1024")))
|
11 |
|
12 |
ALLOW_CHANGING_BASE_MODEL = os.getenv("SPACE_ID") != "hysts/ControlNet-v1-1"
|
13 |
SHOW_DUPLICATE_BUTTON = os.getenv("SHOW_DUPLICATE_BUTTON") == "1"
|
14 |
|
15 |
MAX_SEED = np.iinfo(np.int32).max
|
16 |
|
17 |
+
# Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
|
18 |
+
|
19 |
# setup CUDA
|
20 |
# disable the following when deployting to hugging face
|
21 |
# if os.getenv("CUDA_VISIBLE_DEVICES") is None:
|