Spaces:

jingyangcarl
/

matgen

Running on T4

App Files Files Community

jingyangcarl commited on Jun 6

Commit

9d36776

1 Parent(s): d9c3123

update

Browse files

Files changed (6) hide show

.gitignore +2 -1
app.py +7 -5
app_texnet.py +13 -10
model.py +94 -4
preprocessor.py +32 -0
settings.py +5 -3

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- __pycache__


1	+ __pycache__
2	+ data

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import gradio as gr
 import torch
 from app_canny import create_demo as create_demo_canny
 from model import Model
 from settings import ALLOW_CHANGING_BASE_MODEL, DEFAULT_MODEL_ID, SHOW_DUPLICATE_BUTTON
@@ -13,7 +14,8 @@ DESCRIPTION = "# Material Authoring Demo v0.1. Under Construction"
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
-model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="Canny")
 with gr.Blocks() as demo:
     gr.Markdown(DESCRIPTION)
@@ -24,12 +26,12 @@ with gr.Blocks() as demo:
     )
     with gr.Tabs():
-        with gr.Tab("Canny"):
-            create_demo_canny(model.process_canny)
         with gr.Tab("Texnet"):
-            create_demo_canny(model.process_canny)
         with gr.Tab("Matnet"):
-            create_demo_canny(model.process_canny)
     with gr.Accordion(label="Base model", open=False):
         with gr.Row():

 import torch
 from app_canny import create_demo as create_demo_canny
+from app_texnet import create_demo as create_demo_texnet
 from model import Model
 from settings import ALLOW_CHANGING_BASE_MODEL, DEFAULT_MODEL_ID, SHOW_DUPLICATE_BUTTON
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
+# model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="Canny")
+model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="texnet")
 with gr.Blocks() as demo:
     gr.Markdown(DESCRIPTION)
     )
     with gr.Tabs():
+        # with gr.Tab("Canny"):
+            # create_demo_canny(model.process_canny)
         with gr.Tab("Texnet"):
+            create_demo_texnet(model.process_texnet)
         with gr.Tab("Matnet"):
+            create_demo_texnet(model.process_texnet)
     with gr.Accordion(label="Base model", open=False):
         with gr.Row():

app_texnet.py CHANGED Viewed

@@ -29,12 +29,6 @@ def create_demo(process):
                         value=DEFAULT_IMAGE_RESOLUTION,
                         step=256,
                     )
-                    canny_low_threshold = gr.Slider(
-                        label="Canny low threshold", minimum=1, maximum=255, value=100, step=1
-                    )
-                    canny_high_threshold = gr.Slider(
-                        label="Canny high threshold", minimum=1, maximum=255, value=200, step=1
-                    )
                     num_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=20, step=1)
                     guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
                     seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
@@ -56,8 +50,6 @@ def create_demo(process):
             num_steps,
             guidance_scale,
             seed,
-            canny_low_threshold,
-            canny_high_threshold,
         ]
         prompt.submit(
             fn=randomize_seed_fn,
@@ -72,12 +64,23 @@ def create_demo(process):
             api_name="canny",
             concurrency_id="main",
         )
     return demo
 if __name__ == "__main__":
     from model import Model
-    model = Model(task_name="Canny")
-    demo = create_demo(model.process_canny)
     demo.queue().launch()

                         value=DEFAULT_IMAGE_RESOLUTION,
                         step=256,
                     )
                     num_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=20, step=1)
                     guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
                     seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
             num_steps,
             guidance_scale,
             seed,
         ]
         prompt.submit(
             fn=randomize_seed_fn,
             api_name="canny",
             concurrency_id="main",
         )
+        # TODO: setup several example images
+        gr.Examples(
+            fn=process,
+            inputs=inputs,
+            outputs=result,
+            examples=[
+                # ["/dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/bunny/fused/uv_normal.png", "feather"],
+                ["./data/uv_normal.png", "feather"],
+            ],
+        )
     return demo
 if __name__ == "__main__":
     from model import Model
+    model = Model(task_name="Texnet")
+    demo = create_demo(model.process_texnet)
     demo.queue().launch()

model.py CHANGED Viewed

@@ -1,24 +1,29 @@
 import gc
 import numpy as np
 import PIL.Image
 import torch
 from controlnet_aux.util import HWC3
-import spaces #[uncomment to use ZeroGPU]
 from diffusers import (
     ControlNetModel,
     DiffusionPipeline,
     StableDiffusionControlNetPipeline,
     UniPCMultistepScheduler,
 )
 from cv_utils import resize_image
 from preprocessor import Preprocessor
 from settings import MAX_IMAGE_RESOLUTION, MAX_NUM_IMAGES
 CONTROLNET_MODEL_IDS = {
     # "Openpose": "lllyasviel/control_v11p_sd15_openpose",
-    "Canny": "lllyasviel/control_v11p_sd15_canny",
     # "MLSD": "lllyasviel/control_v11p_sd15_mlsd",
     # "scribble": "lllyasviel/control_v11p_sd15_scribble",
     # "softedge": "lllyasviel/control_v11p_sd15_softedge",
@@ -30,6 +35,8 @@ CONTROLNET_MODEL_IDS = {
     # "shuffle": "lllyasviel/control_v11e_sd15_shuffle",
     # "ip2p": "lllyasviel/control_v11e_sd15_ip2p",
     # "inpaint": "lllyasviel/control_v11e_sd15_inpaint",
 }
@@ -46,6 +53,9 @@ class Model:
         self.base_model_id = ""
         self.task_name = ""
         self.pipe = self.load_pipe(base_model_id, task_name)
         self.preprocessor = Preprocessor()
     def load_pipe(self, base_model_id: str, task_name: str) -> DiffusionPipeline:
@@ -58,6 +68,14 @@ class Model:
             return self.pipe
         model_id = CONTROLNET_MODEL_IDS[task_name]
         controlnet = ControlNetModel.from_pretrained(model_id, torch_dtype=torch.float16)
         pipe = StableDiffusionControlNetPipeline.from_pretrained(
             base_model_id, safety_checker=None, controlnet=controlnet, torch_dtype=torch.float16
         )
@@ -132,7 +150,79 @@ class Model:
             image=control_image,
         ).images
-    @spaces.GPU #[uncomment to use ZeroGPU]
     @torch.inference_mode()
     def process_canny(
         self,

 import gc
+# get socket and check if the name is vgldgx01
+import socket
+if socket.gethostname() != "vgldgx01":
+    import spaces #[uncomment to use ZeroGPU]
 import numpy as np
 import PIL.Image
 import torch
 from controlnet_aux.util import HWC3
 from diffusers import (
     ControlNetModel,
     DiffusionPipeline,
     StableDiffusionControlNetPipeline,
+    StableDiffusionImg2ImgPipeline,
     UniPCMultistepScheduler,
 )
+from torchvision import transforms
 from cv_utils import resize_image
 from preprocessor import Preprocessor
 from settings import MAX_IMAGE_RESOLUTION, MAX_NUM_IMAGES
 CONTROLNET_MODEL_IDS = {
     # "Openpose": "lllyasviel/control_v11p_sd15_openpose",
+    # "Canny": "lllyasviel/control_v11p_sd15_canny",
     # "MLSD": "lllyasviel/control_v11p_sd15_mlsd",
     # "scribble": "lllyasviel/control_v11p_sd15_scribble",
     # "softedge": "lllyasviel/control_v11p_sd15_softedge",
     # "shuffle": "lllyasviel/control_v11e_sd15_shuffle",
     # "ip2p": "lllyasviel/control_v11e_sd15_ip2p",
     # "inpaint": "lllyasviel/control_v11e_sd15_inpaint",
+    # "texnet": "/home/jyang/projects/ObjectReal/logs/train_texnet_deploy/checkpoint-55000/controlnet" # load and call
+    "texnet": "jingyangcarl/texnet",
 }
         self.base_model_id = ""
         self.task_name = ""
         self.pipe = self.load_pipe(base_model_id, task_name)
+        self.pipe_base = StableDiffusionImg2ImgPipeline.from_pretrained(
+            'runwayml/stable-diffusion-v1-5', safety_checker=None, torch_dtype=torch.float16
+        ).to(self.device)
         self.preprocessor = Preprocessor()
     def load_pipe(self, base_model_id: str, task_name: str) -> DiffusionPipeline:
             return self.pipe
         model_id = CONTROLNET_MODEL_IDS[task_name]
         controlnet = ControlNetModel.from_pretrained(model_id, torch_dtype=torch.float16)
+        to_upload = False
+        if to_upload:
+            # confirm before uploading
+            confirm = input(f"Do you want to upload {model_id} to the hub? (y/n): ")
+            if confirm.lower() == "y":
+                controlnet.push_to_hub("jingyangcarl/texnet")
+            else:
+                print("Upload cancelled.")
         pipe = StableDiffusionControlNetPipeline.from_pretrained(
             base_model_id, safety_checker=None, controlnet=controlnet, torch_dtype=torch.float16
         )
             image=control_image,
         ).images
+    # @spaces.GPU #[uncomment to use ZeroGPU]
+    @torch.inference_mode()
+    def process_texnet(
+        self,
+        image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+        low_threshold: int,
+        high_threshold: int,
+    ) -> list[PIL.Image.Image]:
+        if image is None:
+            raise ValueError
+        if image_resolution > MAX_IMAGE_RESOLUTION:
+            raise ValueError
+        if num_images > MAX_NUM_IMAGES:
+            raise ValueError
+        self.preprocessor.load("texnet")
+        control_image = self.preprocessor(
+            image=image, low_threshold=low_threshold, high_threshold=high_threshold, image_resolution=image_resolution, output_type="pil"
+        )
+        self.load_controlnet_weight("texnet")
+        results_coarse = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        # use img2img pipeline
+        self.pipe_backup = self.pipe
+        self.pipe = self.pipe_base
+        # refine
+        results_fine = []
+        for result_coarse in results_coarse:
+            # clean up GPU cache
+            torch.cuda.empty_cache()
+            gc.collect()
+            # masking
+            mask = (np.array(control_image).sum(axis=-1) == 0)[...,None]
+            image_masked = PIL.Image.fromarray(np.where(mask, control_image, result_coarse))
+            image_blurry = transforms.GaussianBlur(kernel_size=5, sigma=1)(image_masked)
+            result_fine = self.run_pipe(
+                # prompt=prompt,
+                prompt=self.get_prompt(prompt, additional_prompt),
+                negative_prompt=negative_prompt,
+                control_image=image_blurry,
+                num_images=1,
+                num_steps=num_steps,
+                guidance_scale=guidance_scale,
+                seed=seed,
+            )[0]
+            result_fine = PIL.Image.fromarray(np.where(mask, control_image, result_fine))
+            results_fine.append(result_fine)
+        # restore the original pipe
+        self.pipe = self.pipe_backup
+        return [control_image, *results_fine, control_image, *results_coarse,]
+    # @spaces.GPU #[uncomment to use ZeroGPU]
     @torch.inference_mode()
     def process_canny(
         self,

preprocessor.py CHANGED Viewed

@@ -60,6 +60,8 @@ class Preprocessor:
             self.model = DepthEstimator()
         elif name == "UPerNet":
             self.model = ImageSegmentor()
         else:
             raise ValueError
         torch.cuda.empty_cache()
@@ -86,3 +88,33 @@ class Preprocessor:
             image = resize_image(image, resolution=image_resolution)
             return PIL.Image.fromarray(image)
         return self.model(image, **kwargs)

             self.model = DepthEstimator()
         elif name == "UPerNet":
             self.model = ImageSegmentor()
+        elif name == 'texnet':
+            self.model = TexnetPreprocessor()
         else:
             raise ValueError
         torch.cuda.empty_cache()
             image = resize_image(image, resolution=image_resolution)
             return PIL.Image.fromarray(image)
         return self.model(image, **kwargs)
+# https://github.com/huggingface/controlnet_aux/blob/master/src/controlnet_aux/canny/__init__.py
+class TexnetPreprocessor:
+    def __call__(self, input_image=None, low_threshold=100, high_threshold=200, image_resolution=512, output_type=None, **kwargs):
+        if "img" in kwargs:
+            warnings.warn("img is deprecated, please use `input_image=...` instead.", DeprecationWarning)
+            input_image = kwargs.pop("img")
+        if input_image is None:
+            raise ValueError("input_image must be defined.")
+        if not isinstance(input_image, np.ndarray):
+            input_image = np.array(input_image, dtype=np.uint8)
+            output_type = output_type or "pil"
+        else:
+            output_type = output_type or "np"
+        input_image = HWC3(input_image)
+        input_image = resize_image(input_image, image_resolution)
+        H, W, C = input_image.shape
+        # detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
+        output_image = input_image.copy()
+        if output_type == "pil":
+            # detected_map = Image.fromarray(detected_map)
+            output_image = PIL.Image.fromarray(output_image)
+        return output_image

settings.py CHANGED Viewed

@@ -5,15 +5,17 @@ import numpy as np
 DEFAULT_MODEL_ID = os.getenv("DEFAULT_MODEL_ID", "stable-diffusion-v1-5/stable-diffusion-v1-5")
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "3"))
-DEFAULT_NUM_IMAGES = min(MAX_NUM_IMAGES, int(os.getenv("DEFAULT_NUM_IMAGES", "3")))
-MAX_IMAGE_RESOLUTION = int(os.getenv("MAX_IMAGE_RESOLUTION", "768"))
-DEFAULT_IMAGE_RESOLUTION = min(MAX_IMAGE_RESOLUTION, int(os.getenv("DEFAULT_IMAGE_RESOLUTION", "768")))
 ALLOW_CHANGING_BASE_MODEL = os.getenv("SPACE_ID") != "hysts/ControlNet-v1-1"
 SHOW_DUPLICATE_BUTTON = os.getenv("SHOW_DUPLICATE_BUTTON") == "1"
 MAX_SEED = np.iinfo(np.int32).max
 # setup CUDA
 # disable the following when deployting to hugging face
 # if os.getenv("CUDA_VISIBLE_DEVICES") is None:

 DEFAULT_MODEL_ID = os.getenv("DEFAULT_MODEL_ID", "stable-diffusion-v1-5/stable-diffusion-v1-5")
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "3"))
+DEFAULT_NUM_IMAGES = min(MAX_NUM_IMAGES, int(os.getenv("DEFAULT_NUM_IMAGES", "2")))
+MAX_IMAGE_RESOLUTION = int(os.getenv("MAX_IMAGE_RESOLUTION", "2048"))
+DEFAULT_IMAGE_RESOLUTION = min(MAX_IMAGE_RESOLUTION, int(os.getenv("DEFAULT_IMAGE_RESOLUTION", "1024")))
 ALLOW_CHANGING_BASE_MODEL = os.getenv("SPACE_ID") != "hysts/ControlNet-v1-1"
 SHOW_DUPLICATE_BUTTON = os.getenv("SHOW_DUPLICATE_BUTTON") == "1"
 MAX_SEED = np.iinfo(np.int32).max
+# Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
 # setup CUDA
 # disable the following when deployting to hugging face
 # if os.getenv("CUDA_VISIBLE_DEVICES") is None: