jingyangcarl commited on
Commit
9d36776
·
1 Parent(s): d9c3123
Files changed (6) hide show
  1. .gitignore +2 -1
  2. app.py +7 -5
  3. app_texnet.py +13 -10
  4. model.py +94 -4
  5. preprocessor.py +32 -0
  6. settings.py +5 -3
.gitignore CHANGED
@@ -1 +1,2 @@
1
- __pycache__
 
 
1
+ __pycache__
2
+ data
app.py CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
4
  import torch
5
 
6
  from app_canny import create_demo as create_demo_canny
 
7
 
8
  from model import Model
9
  from settings import ALLOW_CHANGING_BASE_MODEL, DEFAULT_MODEL_ID, SHOW_DUPLICATE_BUTTON
@@ -13,7 +14,8 @@ DESCRIPTION = "# Material Authoring Demo v0.1. Under Construction"
13
  if not torch.cuda.is_available():
14
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
15
 
16
- model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="Canny")
 
17
 
18
  with gr.Blocks() as demo:
19
  gr.Markdown(DESCRIPTION)
@@ -24,12 +26,12 @@ with gr.Blocks() as demo:
24
  )
25
 
26
  with gr.Tabs():
27
- with gr.Tab("Canny"):
28
- create_demo_canny(model.process_canny)
29
  with gr.Tab("Texnet"):
30
- create_demo_canny(model.process_canny)
31
  with gr.Tab("Matnet"):
32
- create_demo_canny(model.process_canny)
33
 
34
  with gr.Accordion(label="Base model", open=False):
35
  with gr.Row():
 
4
  import torch
5
 
6
  from app_canny import create_demo as create_demo_canny
7
+ from app_texnet import create_demo as create_demo_texnet
8
 
9
  from model import Model
10
  from settings import ALLOW_CHANGING_BASE_MODEL, DEFAULT_MODEL_ID, SHOW_DUPLICATE_BUTTON
 
14
  if not torch.cuda.is_available():
15
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
16
 
17
+ # model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="Canny")
18
+ model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="texnet")
19
 
20
  with gr.Blocks() as demo:
21
  gr.Markdown(DESCRIPTION)
 
26
  )
27
 
28
  with gr.Tabs():
29
+ # with gr.Tab("Canny"):
30
+ # create_demo_canny(model.process_canny)
31
  with gr.Tab("Texnet"):
32
+ create_demo_texnet(model.process_texnet)
33
  with gr.Tab("Matnet"):
34
+ create_demo_texnet(model.process_texnet)
35
 
36
  with gr.Accordion(label="Base model", open=False):
37
  with gr.Row():
app_texnet.py CHANGED
@@ -29,12 +29,6 @@ def create_demo(process):
29
  value=DEFAULT_IMAGE_RESOLUTION,
30
  step=256,
31
  )
32
- canny_low_threshold = gr.Slider(
33
- label="Canny low threshold", minimum=1, maximum=255, value=100, step=1
34
- )
35
- canny_high_threshold = gr.Slider(
36
- label="Canny high threshold", minimum=1, maximum=255, value=200, step=1
37
- )
38
  num_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=20, step=1)
39
  guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
40
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
@@ -56,8 +50,6 @@ def create_demo(process):
56
  num_steps,
57
  guidance_scale,
58
  seed,
59
- canny_low_threshold,
60
- canny_high_threshold,
61
  ]
62
  prompt.submit(
63
  fn=randomize_seed_fn,
@@ -72,12 +64,23 @@ def create_demo(process):
72
  api_name="canny",
73
  concurrency_id="main",
74
  )
 
 
 
 
 
 
 
 
 
 
 
75
  return demo
76
 
77
 
78
  if __name__ == "__main__":
79
  from model import Model
80
 
81
- model = Model(task_name="Canny")
82
- demo = create_demo(model.process_canny)
83
  demo.queue().launch()
 
29
  value=DEFAULT_IMAGE_RESOLUTION,
30
  step=256,
31
  )
 
 
 
 
 
 
32
  num_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=20, step=1)
33
  guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
34
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
 
50
  num_steps,
51
  guidance_scale,
52
  seed,
 
 
53
  ]
54
  prompt.submit(
55
  fn=randomize_seed_fn,
 
64
  api_name="canny",
65
  concurrency_id="main",
66
  )
67
+
68
+ # TODO: setup several example images
69
+ gr.Examples(
70
+ fn=process,
71
+ inputs=inputs,
72
+ outputs=result,
73
+ examples=[
74
+ # ["/dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/bunny/fused/uv_normal.png", "feather"],
75
+ ["./data/uv_normal.png", "feather"],
76
+ ],
77
+ )
78
  return demo
79
 
80
 
81
  if __name__ == "__main__":
82
  from model import Model
83
 
84
+ model = Model(task_name="Texnet")
85
+ demo = create_demo(model.process_texnet)
86
  demo.queue().launch()
model.py CHANGED
@@ -1,24 +1,29 @@
1
  import gc
2
 
 
 
 
 
 
3
  import numpy as np
4
  import PIL.Image
5
  import torch
6
  from controlnet_aux.util import HWC3
7
- import spaces #[uncomment to use ZeroGPU]
8
  from diffusers import (
9
  ControlNetModel,
10
  DiffusionPipeline,
11
  StableDiffusionControlNetPipeline,
 
12
  UniPCMultistepScheduler,
13
  )
14
-
15
  from cv_utils import resize_image
16
  from preprocessor import Preprocessor
17
  from settings import MAX_IMAGE_RESOLUTION, MAX_NUM_IMAGES
18
 
19
  CONTROLNET_MODEL_IDS = {
20
  # "Openpose": "lllyasviel/control_v11p_sd15_openpose",
21
- "Canny": "lllyasviel/control_v11p_sd15_canny",
22
  # "MLSD": "lllyasviel/control_v11p_sd15_mlsd",
23
  # "scribble": "lllyasviel/control_v11p_sd15_scribble",
24
  # "softedge": "lllyasviel/control_v11p_sd15_softedge",
@@ -30,6 +35,8 @@ CONTROLNET_MODEL_IDS = {
30
  # "shuffle": "lllyasviel/control_v11e_sd15_shuffle",
31
  # "ip2p": "lllyasviel/control_v11e_sd15_ip2p",
32
  # "inpaint": "lllyasviel/control_v11e_sd15_inpaint",
 
 
33
  }
34
 
35
 
@@ -46,6 +53,9 @@ class Model:
46
  self.base_model_id = ""
47
  self.task_name = ""
48
  self.pipe = self.load_pipe(base_model_id, task_name)
 
 
 
49
  self.preprocessor = Preprocessor()
50
 
51
  def load_pipe(self, base_model_id: str, task_name: str) -> DiffusionPipeline:
@@ -58,6 +68,14 @@ class Model:
58
  return self.pipe
59
  model_id = CONTROLNET_MODEL_IDS[task_name]
60
  controlnet = ControlNetModel.from_pretrained(model_id, torch_dtype=torch.float16)
 
 
 
 
 
 
 
 
61
  pipe = StableDiffusionControlNetPipeline.from_pretrained(
62
  base_model_id, safety_checker=None, controlnet=controlnet, torch_dtype=torch.float16
63
  )
@@ -132,7 +150,79 @@ class Model:
132
  image=control_image,
133
  ).images
134
 
135
- @spaces.GPU #[uncomment to use ZeroGPU]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  @torch.inference_mode()
137
  def process_canny(
138
  self,
 
1
  import gc
2
 
3
+ # get socket and check if the name is vgldgx01
4
+ import socket
5
+ if socket.gethostname() != "vgldgx01":
6
+ import spaces #[uncomment to use ZeroGPU]
7
+
8
  import numpy as np
9
  import PIL.Image
10
  import torch
11
  from controlnet_aux.util import HWC3
 
12
  from diffusers import (
13
  ControlNetModel,
14
  DiffusionPipeline,
15
  StableDiffusionControlNetPipeline,
16
+ StableDiffusionImg2ImgPipeline,
17
  UniPCMultistepScheduler,
18
  )
19
+ from torchvision import transforms
20
  from cv_utils import resize_image
21
  from preprocessor import Preprocessor
22
  from settings import MAX_IMAGE_RESOLUTION, MAX_NUM_IMAGES
23
 
24
  CONTROLNET_MODEL_IDS = {
25
  # "Openpose": "lllyasviel/control_v11p_sd15_openpose",
26
+ # "Canny": "lllyasviel/control_v11p_sd15_canny",
27
  # "MLSD": "lllyasviel/control_v11p_sd15_mlsd",
28
  # "scribble": "lllyasviel/control_v11p_sd15_scribble",
29
  # "softedge": "lllyasviel/control_v11p_sd15_softedge",
 
35
  # "shuffle": "lllyasviel/control_v11e_sd15_shuffle",
36
  # "ip2p": "lllyasviel/control_v11e_sd15_ip2p",
37
  # "inpaint": "lllyasviel/control_v11e_sd15_inpaint",
38
+ # "texnet": "/home/jyang/projects/ObjectReal/logs/train_texnet_deploy/checkpoint-55000/controlnet" # load and call
39
+ "texnet": "jingyangcarl/texnet",
40
  }
41
 
42
 
 
53
  self.base_model_id = ""
54
  self.task_name = ""
55
  self.pipe = self.load_pipe(base_model_id, task_name)
56
+ self.pipe_base = StableDiffusionImg2ImgPipeline.from_pretrained(
57
+ 'runwayml/stable-diffusion-v1-5', safety_checker=None, torch_dtype=torch.float16
58
+ ).to(self.device)
59
  self.preprocessor = Preprocessor()
60
 
61
  def load_pipe(self, base_model_id: str, task_name: str) -> DiffusionPipeline:
 
68
  return self.pipe
69
  model_id = CONTROLNET_MODEL_IDS[task_name]
70
  controlnet = ControlNetModel.from_pretrained(model_id, torch_dtype=torch.float16)
71
+ to_upload = False
72
+ if to_upload:
73
+ # confirm before uploading
74
+ confirm = input(f"Do you want to upload {model_id} to the hub? (y/n): ")
75
+ if confirm.lower() == "y":
76
+ controlnet.push_to_hub("jingyangcarl/texnet")
77
+ else:
78
+ print("Upload cancelled.")
79
  pipe = StableDiffusionControlNetPipeline.from_pretrained(
80
  base_model_id, safety_checker=None, controlnet=controlnet, torch_dtype=torch.float16
81
  )
 
150
  image=control_image,
151
  ).images
152
 
153
+ # @spaces.GPU #[uncomment to use ZeroGPU]
154
+ @torch.inference_mode()
155
+ def process_texnet(
156
+ self,
157
+ image: np.ndarray,
158
+ prompt: str,
159
+ additional_prompt: str,
160
+ negative_prompt: str,
161
+ num_images: int,
162
+ image_resolution: int,
163
+ num_steps: int,
164
+ guidance_scale: float,
165
+ seed: int,
166
+ low_threshold: int,
167
+ high_threshold: int,
168
+ ) -> list[PIL.Image.Image]:
169
+ if image is None:
170
+ raise ValueError
171
+ if image_resolution > MAX_IMAGE_RESOLUTION:
172
+ raise ValueError
173
+ if num_images > MAX_NUM_IMAGES:
174
+ raise ValueError
175
+
176
+ self.preprocessor.load("texnet")
177
+ control_image = self.preprocessor(
178
+ image=image, low_threshold=low_threshold, high_threshold=high_threshold, image_resolution=image_resolution, output_type="pil"
179
+ )
180
+
181
+ self.load_controlnet_weight("texnet")
182
+ results_coarse = self.run_pipe(
183
+ prompt=self.get_prompt(prompt, additional_prompt),
184
+ negative_prompt=negative_prompt,
185
+ control_image=control_image,
186
+ num_images=num_images,
187
+ num_steps=num_steps,
188
+ guidance_scale=guidance_scale,
189
+ seed=seed,
190
+ )
191
+
192
+ # use img2img pipeline
193
+ self.pipe_backup = self.pipe
194
+ self.pipe = self.pipe_base
195
+
196
+ # refine
197
+ results_fine = []
198
+ for result_coarse in results_coarse:
199
+ # clean up GPU cache
200
+ torch.cuda.empty_cache()
201
+ gc.collect()
202
+
203
+ # masking
204
+ mask = (np.array(control_image).sum(axis=-1) == 0)[...,None]
205
+ image_masked = PIL.Image.fromarray(np.where(mask, control_image, result_coarse))
206
+ image_blurry = transforms.GaussianBlur(kernel_size=5, sigma=1)(image_masked)
207
+ result_fine = self.run_pipe(
208
+ # prompt=prompt,
209
+ prompt=self.get_prompt(prompt, additional_prompt),
210
+ negative_prompt=negative_prompt,
211
+ control_image=image_blurry,
212
+ num_images=1,
213
+ num_steps=num_steps,
214
+ guidance_scale=guidance_scale,
215
+ seed=seed,
216
+ )[0]
217
+ result_fine = PIL.Image.fromarray(np.where(mask, control_image, result_fine))
218
+ results_fine.append(result_fine)
219
+
220
+ # restore the original pipe
221
+ self.pipe = self.pipe_backup
222
+
223
+ return [control_image, *results_fine, control_image, *results_coarse,]
224
+
225
+ # @spaces.GPU #[uncomment to use ZeroGPU]
226
  @torch.inference_mode()
227
  def process_canny(
228
  self,
preprocessor.py CHANGED
@@ -60,6 +60,8 @@ class Preprocessor:
60
  self.model = DepthEstimator()
61
  elif name == "UPerNet":
62
  self.model = ImageSegmentor()
 
 
63
  else:
64
  raise ValueError
65
  torch.cuda.empty_cache()
@@ -86,3 +88,33 @@ class Preprocessor:
86
  image = resize_image(image, resolution=image_resolution)
87
  return PIL.Image.fromarray(image)
88
  return self.model(image, **kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  self.model = DepthEstimator()
61
  elif name == "UPerNet":
62
  self.model = ImageSegmentor()
63
+ elif name == 'texnet':
64
+ self.model = TexnetPreprocessor()
65
  else:
66
  raise ValueError
67
  torch.cuda.empty_cache()
 
88
  image = resize_image(image, resolution=image_resolution)
89
  return PIL.Image.fromarray(image)
90
  return self.model(image, **kwargs)
91
+
92
+
93
+ # https://github.com/huggingface/controlnet_aux/blob/master/src/controlnet_aux/canny/__init__.py
94
+ class TexnetPreprocessor:
95
+ def __call__(self, input_image=None, low_threshold=100, high_threshold=200, image_resolution=512, output_type=None, **kwargs):
96
+ if "img" in kwargs:
97
+ warnings.warn("img is deprecated, please use `input_image=...` instead.", DeprecationWarning)
98
+ input_image = kwargs.pop("img")
99
+
100
+ if input_image is None:
101
+ raise ValueError("input_image must be defined.")
102
+
103
+ if not isinstance(input_image, np.ndarray):
104
+ input_image = np.array(input_image, dtype=np.uint8)
105
+ output_type = output_type or "pil"
106
+ else:
107
+ output_type = output_type or "np"
108
+
109
+ input_image = HWC3(input_image)
110
+ input_image = resize_image(input_image, image_resolution)
111
+ H, W, C = input_image.shape
112
+
113
+ # detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
114
+ output_image = input_image.copy()
115
+
116
+ if output_type == "pil":
117
+ # detected_map = Image.fromarray(detected_map)
118
+ output_image = PIL.Image.fromarray(output_image)
119
+
120
+ return output_image
settings.py CHANGED
@@ -5,15 +5,17 @@ import numpy as np
5
  DEFAULT_MODEL_ID = os.getenv("DEFAULT_MODEL_ID", "stable-diffusion-v1-5/stable-diffusion-v1-5")
6
 
7
  MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "3"))
8
- DEFAULT_NUM_IMAGES = min(MAX_NUM_IMAGES, int(os.getenv("DEFAULT_NUM_IMAGES", "3")))
9
- MAX_IMAGE_RESOLUTION = int(os.getenv("MAX_IMAGE_RESOLUTION", "768"))
10
- DEFAULT_IMAGE_RESOLUTION = min(MAX_IMAGE_RESOLUTION, int(os.getenv("DEFAULT_IMAGE_RESOLUTION", "768")))
11
 
12
  ALLOW_CHANGING_BASE_MODEL = os.getenv("SPACE_ID") != "hysts/ControlNet-v1-1"
13
  SHOW_DUPLICATE_BUTTON = os.getenv("SHOW_DUPLICATE_BUTTON") == "1"
14
 
15
  MAX_SEED = np.iinfo(np.int32).max
16
 
 
 
17
  # setup CUDA
18
  # disable the following when deployting to hugging face
19
  # if os.getenv("CUDA_VISIBLE_DEVICES") is None:
 
5
  DEFAULT_MODEL_ID = os.getenv("DEFAULT_MODEL_ID", "stable-diffusion-v1-5/stable-diffusion-v1-5")
6
 
7
  MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "3"))
8
+ DEFAULT_NUM_IMAGES = min(MAX_NUM_IMAGES, int(os.getenv("DEFAULT_NUM_IMAGES", "2")))
9
+ MAX_IMAGE_RESOLUTION = int(os.getenv("MAX_IMAGE_RESOLUTION", "2048"))
10
+ DEFAULT_IMAGE_RESOLUTION = min(MAX_IMAGE_RESOLUTION, int(os.getenv("DEFAULT_IMAGE_RESOLUTION", "1024")))
11
 
12
  ALLOW_CHANGING_BASE_MODEL = os.getenv("SPACE_ID") != "hysts/ControlNet-v1-1"
13
  SHOW_DUPLICATE_BUTTON = os.getenv("SHOW_DUPLICATE_BUTTON") == "1"
14
 
15
  MAX_SEED = np.iinfo(np.int32).max
16
 
17
+ # Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
18
+
19
  # setup CUDA
20
  # disable the following when deployting to hugging face
21
  # if os.getenv("CUDA_VISIBLE_DEVICES") is None: