Spaces:

coconutiscoding
/

image-seg-inpaint-demo

Sleeping

App Files Files Community

coconutiscoding commited on Aug 29

Commit

5809e83

1 Parent(s): ed1cbdb

change model path

Browse files

Files changed (2) hide show

app.py +174 -174
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,5 +1,108 @@
 # import gradio as gr
 # import torch
 # import numpy as np
 # from PIL import Image
 # from torchvision.models.segmentation import deeplabv3_resnet50, DeepLabV3_ResNet50_Weights
@@ -10,16 +113,82 @@
 # # ---------------- 下载并加载 LaMa 官方权重 ----------------
-# # repo_id = "saic-mdal/lama-big"
-# # model_path = hf_hub_download(repo_id=repo_id, filename="big-lama.pt")
 # zip_path = hf_hub_download(repo_id="smartywu/big-lama", filename="big-lama.zip")
-# import zipfile
 # with zipfile.ZipFile(zip_path, 'r') as z:
 #     z.extractall("./")
-# model_path = "./models/best.ckpt"
-# lama_model = torch.jit.load(model_path, map_location="cpu")
 # lama_model.eval()
 # print("torch:", torch.__version__)
 # print("numpy:", np.__version__)
@@ -99,172 +268,3 @@
 # if __name__ == "__main__":
 #     demo.launch()
-import gradio as gr
-import torch
-import torch.nn as nn
-import numpy as np
-from PIL import Image
-from torchvision.models.segmentation import deeplabv3_resnet50, DeepLabV3_ResNet50_Weights
-from huggingface_hub import hf_hub_download
-import cv2
-import zipfile
-# ---------------- 下载并加载 LaMa 官方权重 ----------------
-zip_path = hf_hub_download(repo_id="smartywu/big-lama", filename="big-lama.zip")
-with zipfile.ZipFile(zip_path, 'r') as z:
-    z.extractall("./")
-model_path = "./big-lama/models/best.ckpt"
-# ==========================================================
-#  LaMa FBAResUNet 定义（官方结构）
-# ==========================================================
-class GatedConv(nn.Module):
-    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1):
-        super().__init__()
-        padding = (kernel_size - 1) // 2 * dilation
-        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride,
-                              padding=padding, dilation=dilation)
-        self.mask_conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride,
-                                   padding=padding, dilation=dilation)
-        self.sigmoid = nn.Sigmoid()
-    def forward(self, x):
-        feat = self.conv(x)
-        mask = self.sigmoid(self.mask_conv(x))
-        return feat * mask
-class FBAResUNet(nn.Module):
-    def __init__(self, input_channels=4, output_channels=3, num_filters=64):
-        super().__init__()
-        self.enc1 = GatedConv(input_channels, num_filters)
-        self.enc2 = GatedConv(num_filters, num_filters * 2, stride=2)
-        self.enc3 = GatedConv(num_filters * 2, num_filters * 4, stride=2)
-        self.enc4 = GatedConv(num_filters * 4, num_filters * 8, stride=2)
-        self.middle = GatedConv(num_filters * 8, num_filters * 8)
-        self.dec4 = nn.ConvTranspose2d(num_filters * 8, num_filters * 4, kernel_size=4, stride=2, padding=1)
-        self.dec3 = nn.ConvTranspose2d(num_filters * 8, num_filters * 2, kernel_size=4, stride=2, padding=1)
-        self.dec2 = nn.ConvTranspose2d(num_filters * 4, num_filters, kernel_size=4, stride=2, padding=1)
-        self.dec1 = nn.Conv2d(num_filters * 2, output_channels, kernel_size=3, padding=1)
-        self.relu = nn.ReLU(inplace=True)
-    def forward(self, image, mask):
-        # image: [B,3,H,W], mask: [B,1,H,W]
-        x = torch.cat([image, mask], dim=1)  # -> [B,4,H,W]
-        e1 = self.enc1(x)
-        e2 = self.enc2(self.relu(e1))
-        e3 = self.enc3(self.relu(e2))
-        e4 = self.enc4(self.relu(e3))
-        m = self.middle(self.relu(e4))
-        d4 = self.relu(self.dec4(m))
-        d4 = torch.cat([d4, e3], dim=1)
-        d3 = self.relu(self.dec3(d4))
-        d3 = torch.cat([d3, e2], dim=1)
-        d2 = self.relu(self.dec2(d3))
-        d2 = torch.cat([d2, e1], dim=1)
-        out = torch.sigmoid(self.dec1(d2))
-        return out
-# ==========================================================
-#  加载 LaMa 预训练权重
-# ==========================================================
-checkpoint = torch.load(model_path, map_location="cpu")
-lama_model = FBAResUNet()
-if "state_dict" in checkpoint:
-    state_dict = {k.replace("netG.", ""): v for k, v in checkpoint["state_dict"].items()}
-else:
-    state_dict = checkpoint
-lama_model.load_state_dict(state_dict, strict=False)
-lama_model.eval()
-print("torch:", torch.__version__)
-print("numpy:", np.__version__)
-# ---- 加载分割模型（CPU） ----
-device = torch.device("cpu")
-weights = DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1
-model = deeplabv3_resnet50(weights=weights).to(device).eval()
-preprocess = weights.transforms()
-MAX_SIDE = 1024  # 为了速度与内存，限制输入最大边
-def _resize_if_needed(pil_img: Image.Image, max_side=MAX_SIDE) -> Image.Image:
-    w, h = pil_img.size
-    if max(w, h) <= max_side:
-        return pil_img
-    r = max_side / float(max(w, h))
-    return pil_img.resize((int(w * r), int(h * r)), Image.BILINEAR)
-def segment(image: Image.Image):
-    print("DEBUG: type(image) =", type(image), "mode=", getattr(image, "mode", None))
-    if not isinstance(image, Image.Image):
-        image = Image.fromarray(image)
-    image = image.convert("RGB")
-    image = _resize_if_needed(image)
-    # 预处理并推理
-    x = torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0
-    x = x.unsqueeze(0).to(device)  # [1,3,H,W]
-    with torch.no_grad():
-        out = model(x)["out"][0]                   # [C,H,W]，C=21（含背景）
-    pred = out.argmax(0).cpu().numpy()             # [H,W]
-    # 前景 = 非背景（背景类在COCO VOC权重下是0）
-    fg = (pred != 0).astype(np.uint8)
-    # ---------------- mask 膨胀 ----------------
-    kernel = np.ones((19,19), np.uint8)
-    fg_dilated = cv2.dilate(fg, kernel, iterations=1)
-    print("add dilated process！")
-    mask_img = Image.fromarray((fg_dilated * 255).astype(np.uint8), mode="L")
-    # 叠加彩色遮罩（红色半透明）
-    base = image.convert("RGBA")
-    overlay = Image.new("RGBA", base.size, (255, 0, 0, 0))
-    alpha = Image.fromarray((fg_dilated * 120).astype(np.uint8))
-    overlay.putalpha(alpha)
-    blended = Image.alpha_composite(base, overlay).convert("RGB")
-    # ---- LaMa 擦除 ----
-    img_np = np.array(image)  # HWC, uint8
-    mask_np = np.array(mask_img)  # H,W, 0/255
-    img_t = torch.from_numpy(img_np).permute(2, 0, 1).float().unsqueeze(0) / 255.0
-    mask_t = torch.from_numpy(mask_np).unsqueeze(0).unsqueeze(0).float() / 255.0
-    with torch.no_grad():
-        inpainted_t = lama_model(img_t, mask_t)  # [1,3,H,W]
-    inpainted_np = (inpainted_t[0].permute(1, 2, 0).numpy() * 255).astype(np.uint8)
-    inpainted_img = Image.fromarray(inpainted_np)
-    return blended, mask_img, inpainted_img
-# ---- Gradio 界面 ----
-demo = gr.Interface(
-    fn=segment,
-    inputs=gr.Image(type="pil", label="Upload Image"),
-    outputs=[
-        gr.Image(type="pil", label="Overlay (foreground)"),
-        gr.Image(type="pil", label="Binary Mask (foreground=white)"),
-        gr.Image(type="pil", label="inpaint result"),
-    ],
-    title="Semantic Segmentation + LaMa Inpainting",
-    description="DeepLabV3 分割 + Mask 膨胀 + LaMa 擦除，运行在 CPU 环境。"
-)
-if __name__ == "__main__":
-    demo.launch()

+import gradio as gr
+import torch
+import numpy as np
+from PIL import Image
+from torchvision.models.segmentation import deeplabv3_resnet50, DeepLabV3_ResNet50_Weights
+from huggingface_hub import hf_hub_download
+import cv2
+import zipfile
+# ---------------- 下载并加载 LaMa 官方权重 ----------------
+repo_id = "JosephCatrambone/big-lama-torchscript"
+model_path = hf_hub_download(repo_id=repo_id, filename="big-lama.pt")
+# zip_path = hf_hub_download(repo_id="smartywu/big-lama", filename="big-lama.zip")
+# import zipfile
+# with zipfile.ZipFile(zip_path, 'r') as z:
+#     z.extractall("./")
+# model_path = "./models/best.ckpt"
+lama_model = torch.jit.load(model_path, map_location="cpu")
+lama_model.eval()
+print("torch:", torch.__version__)
+print("numpy:", np.__version__)
+# ---- 加载分割模型（CPU） ----
+device = torch.device("cpu")
+weights = DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1
+model = deeplabv3_resnet50(weights=weights).to(device).eval()
+preprocess = weights.transforms()
+MAX_SIDE = 1024  # 为了速度与内存，限制输入最大边
+def _resize_if_needed(pil_img: Image.Image, max_side=MAX_SIDE) -> Image.Image:
+    w, h = pil_img.size
+    if max(w, h) <= max_side:
+        return pil_img
+    r = max_side / float(max(w, h))
+    return pil_img.resize((int(w * r), int(h * r)), Image.BILINEAR)
+def segment(image: Image.Image):
+    print("DEBUG: type(image) =", type(image), "mode=", getattr(image, "mode", None))
+    if not isinstance(image, Image.Image):
+        image = Image.fromarray(image)
+    image = image.convert("RGB")
+    image = _resize_if_needed(image)
+    # 预处理并推理
+    x = torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0
+    x = x.unsqueeze(0).to(device)  # [1,3,H,W]
+    with torch.no_grad():
+        out = model(x)["out"][0]                   # [C,H,W]，C=21（含背景）
+    pred = out.argmax(0).cpu().numpy()             # [H,W]
+    # 前景 = 非背景（背景类在COCO VOC权重下是0）
+    fg = (pred != 0).astype(np.uint8)
+    # ---------------- mask 膨胀 ----------------
+    kernel = np.ones((19,19), np.uint8)
+    fg_dilated = cv2.dilate(fg, kernel, iterations=1)
+    print("add dilated process！")
+    mask_img = Image.fromarray((fg_dilated * 255).astype(np.uint8), mode="L")
+    # 叠加彩色遮罩（红色半透明）
+    base = image.convert("RGBA")
+    overlay = Image.new("RGBA", base.size, (255, 0, 0, 0))
+    alpha = Image.fromarray((fg_dilated * 120).astype(np.uint8))
+    overlay.putalpha(alpha)
+    blended = Image.alpha_composite(base, overlay).convert("RGB")
+    # ---- LaMa 擦除 ----
+    img_np = np.array(image)  # HWC, uint8
+    mask_np = np.array(mask_img)  # H,W, 0/255
+    img_t = torch.from_numpy(img_np).permute(2, 0, 1).float().unsqueeze(0) / 255.0
+    mask_t = torch.from_numpy(mask_np).unsqueeze(0).unsqueeze(0).float() / 255.0
+    with torch.no_grad():
+        inpainted_t = lama_model(img_t, mask_t)  # [1,3,H,W]
+    inpainted_np = (inpainted_t[0].permute(1, 2, 0).numpy() * 255).astype(np.uint8)
+    inpainted_img = Image.fromarray(inpainted_np)
+    return blended, mask_img, inpainted_img
+# ---- Gradio 界面 ----
+demo = gr.Interface(
+    fn=segment,
+    inputs=gr.Image(type="pil", label="Upload Image"),
+    outputs=[
+        gr.Image(type="pil", label="Overlay (foreground)"),
+        gr.Image(type="pil", label="Binary Mask (foreground=white)"),
+        gr.Image(type="pil", label="inpaint result"),
+    ],
+    title="Semantic Segmentation + LaMa Inpainting",
+    description="DeepLabV3 分割 + Mask 膨胀 + LaMa 擦除，运行在 CPU 环境。"
+)
+if __name__ == "__main__":
+    demo.launch()
 # import gradio as gr
 # import torch
+# import torch.nn as nn
 # import numpy as np
 # from PIL import Image
 # from torchvision.models.segmentation import deeplabv3_resnet50, DeepLabV3_ResNet50_Weights
 # # ---------------- 下载并加载 LaMa 官方权重 ----------------
 # zip_path = hf_hub_download(repo_id="smartywu/big-lama", filename="big-lama.zip")
 # with zipfile.ZipFile(zip_path, 'r') as z:
 #     z.extractall("./")
+# model_path = "./big-lama/models/best.ckpt"
+# # ==========================================================
+# #  LaMa FBAResUNet 定义（官方结构）
+# # ==========================================================
+# class GatedConv(nn.Module):
+#     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1):
+#         super().__init__()
+#         padding = (kernel_size - 1) // 2 * dilation
+#         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride,
+#                               padding=padding, dilation=dilation)
+#         self.mask_conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride,
+#                                    padding=padding, dilation=dilation)
+#         self.sigmoid = nn.Sigmoid()
+#     def forward(self, x):
+#         feat = self.conv(x)
+#         mask = self.sigmoid(self.mask_conv(x))
+#         return feat * mask
+# class FBAResUNet(nn.Module):
+#     def __init__(self, input_channels=4, output_channels=3, num_filters=64):
+#         super().__init__()
+#         self.enc1 = GatedConv(input_channels, num_filters)
+#         self.enc2 = GatedConv(num_filters, num_filters * 2, stride=2)
+#         self.enc3 = GatedConv(num_filters * 2, num_filters * 4, stride=2)
+#         self.enc4 = GatedConv(num_filters * 4, num_filters * 8, stride=2)
+#         self.middle = GatedConv(num_filters * 8, num_filters * 8)
+#         self.dec4 = nn.ConvTranspose2d(num_filters * 8, num_filters * 4, kernel_size=4, stride=2, padding=1)
+#         self.dec3 = nn.ConvTranspose2d(num_filters * 8, num_filters * 2, kernel_size=4, stride=2, padding=1)
+#         self.dec2 = nn.ConvTranspose2d(num_filters * 4, num_filters, kernel_size=4, stride=2, padding=1)
+#         self.dec1 = nn.Conv2d(num_filters * 2, output_channels, kernel_size=3, padding=1)
+#         self.relu = nn.ReLU(inplace=True)
+#     def forward(self, image, mask):
+#         # image: [B,3,H,W], mask: [B,1,H,W]
+#         x = torch.cat([image, mask], dim=1)  # -> [B,4,H,W]
+#         e1 = self.enc1(x)
+#         e2 = self.enc2(self.relu(e1))
+#         e3 = self.enc3(self.relu(e2))
+#         e4 = self.enc4(self.relu(e3))
+#         m = self.middle(self.relu(e4))
+#         d4 = self.relu(self.dec4(m))
+#         d4 = torch.cat([d4, e3], dim=1)
+#         d3 = self.relu(self.dec3(d4))
+#         d3 = torch.cat([d3, e2], dim=1)
+#         d2 = self.relu(self.dec2(d3))
+#         d2 = torch.cat([d2, e1], dim=1)
+#         out = torch.sigmoid(self.dec1(d2))
+#         return out
+# # ==========================================================
+# #  加载 LaMa 预训练权重
+# # ==========================================================
+# checkpoint = torch.load(model_path, map_location="cpu")
+# lama_model = FBAResUNet()
+# if "state_dict" in checkpoint:
+#     state_dict = {k.replace("netG.", ""): v for k, v in checkpoint["state_dict"].items()}
+# else:
+#     state_dict = checkpoint
+# lama_model.load_state_dict(state_dict, strict=False)
 # lama_model.eval()
 # print("torch:", torch.__version__)
 # print("numpy:", np.__version__)
 # if __name__ == "__main__":
 #     demo.launch()

requirements.txt CHANGED Viewed

@@ -16,5 +16,6 @@ opencv-python
 gradio>=4.0.0
 # ---- LaMa inpainting 后续需要 ----
-pytorch-lightning
 huggingface_hub

 gradio>=4.0.0
 # ---- LaMa inpainting 后续需要 ----
+# pytorch-lightning
+# omegaconf
 huggingface_hub