segment-anything-webgpu

Running

App Files Files Community

sezer91 commited on Apr 21

Commit

ee7b9e5

1 Parent(s): 274e615

d

Browse files

Files changed (1) hide show

index.js +169 -201

index.js CHANGED Viewed

@@ -14,8 +14,6 @@ const uploadButton = document.getElementById("upload-button");
 const resetButton = document.getElementById("reset-image");
 const clearButton = document.getElementById("clear-points");
 const cutButton = document.getElementById("cut-mask");
-const starIcon = document.getElementById("star-icon");
-const crossIcon = document.getElementById("cross-icon");
 const maskCanvas = document.getElementById("mask-output");
 const maskContext = maskCanvas.getContext("2d");
@@ -24,161 +22,189 @@ const EXAMPLE_URL =
 // State variables
 let isEncoding = false;
-let isDecoding = false;
-let decodePending = false;
-let lastPoints = null;
-let isMultiMaskMode = false;
 let imageInput = null;
 let imageProcessed = null;
 let imageEmbeddings = null;
-async function decode() {
-  // Only proceed if we are not already decoding
-  if (isDecoding) {
-    decodePending = true;
     return;
   }
-  isDecoding = true;
-  // Prepare inputs for decoding
   const reshaped = imageProcessed.reshaped_input_sizes[0];
-  const points = lastPoints
-    .map((x) => [x.position[0] * reshaped[1], x.position[1] * reshaped[0]])
-    .flat(Infinity);
-  const labels = lastPoints.map((x) => BigInt(x.label)).flat(Infinity);
-  const num_points = lastPoints.length;
-  const input_points = new Tensor("float32", points, [1, 1, num_points, 2]);
-  const input_labels = new Tensor("int64", labels, [1, 1, num_points]);
-  // Generate the mask
-  const { pred_masks, iou_scores } = await model({
-    ...imageEmbeddings,
-    input_points,
-    input_labels,
-  });
-  // Post-process the mask
-  const masks = await processor.post_process_masks(
-    pred_masks,
-    imageProcessed.original_sizes,
-    imageProcessed.reshaped_input_sizes,
-  );
-  isDecoding = false;
-  updateMaskOverlay(RawImage.fromTensor(masks[0][0]), iou_scores.data);
-  // Check if another decode is pending
-  if (decodePending) {
-    decodePending = false;
-    decode();
   }
-}
-function updateMaskOverlay(mask, scores) {
-  // Update canvas dimensions (if different)
-  if (maskCanvas.width !== mask.width || maskCanvas.height !== mask.height) {
-    maskCanvas.width = mask.width;
-    maskCanvas.height = mask.height;
   }
-  // Allocate buffer for pixel data
-  const imageData = maskContext.createImageData(
-    maskCanvas.width,
-    maskCanvas.height,
-  );
-  // Select best mask
-  const numMasks = scores.length; // 3
-  let bestIndex = 0;
-  for (let i = 1; i < numMasks; ++i) {
-    if (scores[i] > scores[bestIndex]) {
-      bestIndex = i;
     }
-  }
-  statusLabel.textContent = `Segment score: ${scores[bestIndex].toFixed(2)}`;
-  // Fill mask with colour
-  const pixelData = imageData.data;
-  for (let i = 0; i < pixelData.length; ++i) {
-    if (mask.data[numMasks * i + bestIndex] === 1) {
-      const offset = 4 * i;
-      pixelData[offset] = 0; // red
-      pixelData[offset + 1] = 114; // green
-      pixelData[offset + 2] = 189; // blue
-      pixelData[offset + 3] = 255; // alpha
     }
   }
-  // Draw image data to context
-  maskContext.putImageData(imageData, 0, 0);
-}
-function clearPointsAndMask() {
-  // Reset state
-  isMultiMaskMode = false;
-  lastPoints = null;
-  // Remove points from previous mask (if any)
-  document.querySelectorAll(".icon").forEach((e) => e.remove());
-  // Disable cut button
-  cutButton.disabled = true;
-  // Reset mask canvas
-  maskContext.clearRect(0, 0, maskCanvas.width, maskCanvas.height);
 }
-clearButton.addEventListener("click", clearPointsAndMask);
-resetButton.addEventListener("click", () => {
-  // Reset the state
-  imageInput = null;
-  imageProcessed = null;
-  imageEmbeddings = null;
-  isEncoding = false;
-  isDecoding = false;
-  // Clear points and mask (if present)
-  clearPointsAndMask();
-  // Update UI
-  cutButton.disabled = true;
-  imageContainer.style.backgroundImage = "none";
-  uploadButton.style.display = "flex";
-  statusLabel.textContent = "Ready";
-});
-async function encode(url) {
-  if (isEncoding) return;
-  isEncoding = true;
-  statusLabel.textContent = "Extracting image embedding...";
-  imageInput = await RawImage.fromURL(url);
-  // Update UI
-  imageContainer.style.backgroundImage = `url(${url})`;
-  uploadButton.style.display = "none";
-  cutButton.disabled = true;
-  // Recompute image embeddings
-  imageProcessed = await processor(imageInput);
-  imageEmbeddings = await model.get_image_embeddings(imageProcessed);
-  statusLabel.textContent = "Embedding extracted!";
-  isEncoding = false;
 }
-// Handle file selection
 fileUpload.addEventListener("change", function (e) {
   const file = e.target.files[0];
   if (!file) return;
   const reader = new FileReader();
-  // Set up a callback when the file is loaded
   reader.onload = (e2) => encode(e2.target.result);
   reader.readAsDataURL(file);
 });
@@ -187,90 +213,32 @@ example.addEventListener("click", (e) => {
   encode(EXAMPLE_URL);
 });
-// Attach hover event to image container
-imageContainer.addEventListener("mousedown", (e) => {
-  if (e.button !== 0 && e.button !== 2) {
-    return; // Ignore other buttons
-  }
-  if (!imageEmbeddings) {
-    return; // Ignore if not encoded yet
-  }
-  if (!isMultiMaskMode) {
-    lastPoints = [];
-    isMultiMaskMode = true;
-    cutButton.disabled = false;
-  }
-  const point = getPoint(e);
-  lastPoints.push(point);
-  // add icon
-  const icon = (point.label === 1 ? starIcon : crossIcon).cloneNode();
-  icon.style.left = `${point.position[0] * 100}%`;
-  icon.style.top = `${point.position[1] * 100}%`;
-  imageContainer.appendChild(icon);
-  // Run decode
-  decode();
-});
-// Clamp a value inside a range [min, max]
-function clamp(x, min = 0, max = 1) {
-  return Math.max(Math.min(x, max), min);
-}
-function getPoint(e) {
-  // Get bounding box
-  const bb = imageContainer.getBoundingClientRect();
-  // Get the mouse coordinates relative to the container
-  const mouseX = clamp((e.clientX - bb.left) / bb.width);
-  const mouseY = clamp((e.clientY - bb.top) / bb.height);
-  return {
-    position: [mouseX, mouseY],
-    label:
-      e.button === 2 // right click
-        ? 0 // negative prompt
-        : 1, // positive prompt
-  };
-}
-// Do not show context menu on right click
-imageContainer.addEventListener("contextmenu", (e) => e.preventDefault());
-// Attach hover event to image container
-imageContainer.addEventListener("mousemove", (e) => {
-  if (!imageEmbeddings || isMultiMaskMode) {
-    // Ignore mousemove events if the image is not encoded yet,
-    // or we are in multi-mask mode
-    return;
-  }
-  lastPoints = [getPoint(e)];
-  decode();
 });
-// Handle cut button click
 cutButton.addEventListener("click", async () => {
   const [w, h] = [maskCanvas.width, maskCanvas.height];
-  // Get the mask pixel data (and use this as a buffer)
   const maskImageData = maskContext.getImageData(0, 0, w, h);
-  // Create a new canvas to hold the cut-out
   const cutCanvas = new OffscreenCanvas(w, h);
   const cutContext = cutCanvas.getContext("2d");
-  // Copy the image pixel data to the cut canvas
   const maskPixelData = maskImageData.data;
   const imagePixelData = imageInput.data;
   for (let i = 0; i < w * h; ++i) {
-    const sourceOffset = 3 * i; // RGB
-    const targetOffset = 4 * i; // RGBA
     if (maskPixelData[targetOffset + 3] > 0) {
-      // Only copy opaque pixels
       for (let j = 0; j < 3; ++j) {
         maskPixelData[targetOffset + j] = imagePixelData[sourceOffset + j];
       }
@@ -278,7 +246,6 @@ cutButton.addEventListener("click", async () => {
   }
   cutContext.putImageData(maskImageData, 0, 0);
-  // Download image
   const link = document.createElement("a");
   link.download = "image.png";
   link.href = URL.createObjectURL(await cutCanvas.convertToBlob());
@@ -286,16 +253,17 @@ cutButton.addEventListener("click", async () => {
   link.remove();
 });
 const model_id = "Xenova/slimsam-77-uniform";
 statusLabel.textContent = "Loading model...";
-const model = await SamModel.from_pretrained(model_id, {
-  dtype: "fp16", // or "fp32"
   device: "webgpu",
 });
-const processor = await AutoProcessor.from_pretrained(model_id);
 statusLabel.textContent = "Ready";
-// Enable the user interface
 fileUpload.disabled = false;
 uploadButton.style.opacity = 1;
 example.style.pointerEvents = "auto";

 const resetButton = document.getElementById("reset-image");
 const clearButton = document.getElementById("clear-points");
 const cutButton = document.getElementById("cut-mask");
 const maskCanvas = document.getElementById("mask-output");
 const maskContext = maskCanvas.getContext("2d");
 // State variables
 let isEncoding = false;
 let imageInput = null;
 let imageProcessed = null;
 let imageEmbeddings = null;
+let model = null;
+let processor = null;
+async function encode(url) {
+  if (isEncoding) return;
+  isEncoding = true;
+  statusLabel.textContent = "Extracting image embedding...";
+  imageInput = await RawImage.fromURL(url);
+  // Update UI
+  imageContainer.style.backgroundImage = `url(${url})`;
+  uploadButton.style.display = "none";
+  cutButton.disabled = true;
+  // Recompute image embeddings
+  imageProcessed = await processor(imageInput);
+  imageEmbeddings = await model.get_image_embeddings(imageProcessed);
+  statusLabel.textContent = "Embedding extracted!";
+  isEncoding = false;
+  // Otomatik segmentasyon için hemen çalıştır
+  await autoSegment();
+}
+async function autoSegment() {
+  if (!imageEmbeddings) {
+    statusLabel.textContent = "No image embeddings available!";
     return;
   }
+  statusLabel.textContent = "Generating automatic segments...";
+  // Grid tabanlı noktalar oluştur (otomatik segmentasyon için)
+  const gridSize = 50; // Grid boyutu (piksel cinsinden)
   const reshaped = imageProcessed.reshaped_input_sizes[0];
+  let points = [];
+  for (let y = gridSize / 2; y < imageInput.height; y += gridSize) {
+    for (let x = gridSize / 2; x < imageInput.width; x += gridSize) {
+      points.push([
+        (x / imageInput.width) * reshaped[1],
+        (y / imageInput.height) * reshaped[0],
+      ]);
+    }
   }
+  // Maskeleri saklamak için dizi
+  let masks = [];
+  let scores = [];
+  // Her grid noktası için segmentasyon yap
+  for (let i = 0; i < points.length; i++) {
+    const point = points[i];
+    const input_points = new Tensor("float32", point, [1, 1, 1, 2]);
+    const input_labels = new Tensor("int64", [1n], [1, 1, 1]);
+    const { pred_masks, iou_scores } = await model({
+      ...imageEmbeddings,
+      input_points,
+      input_labels,
+    });
+    const processedMasks = await processor.post_process_masks(
+      pred_masks,
+      imageProcessed.original_sizes,
+      imageProcessed.reshaped_input_sizes,
+    );
+    masks.push(processedMasks[0][0]); // İlk maskeyi al
+    scores.push(iou_scores.data);
   }
+  // Maskeleri filtrele (çok küçük veya düşük skorlu maskeleri atla)
+  const filteredMasks = [];
+  const filteredScores = [];
+  for (let i = 0; i < masks.length; i++) {
+    const mask = masks[i];
+    let pixelCount = 0;
+    for (let j = 0; j < mask.data.length; j++) {
+      if (mask.data[j] === 1) pixelCount++;
     }
+    if (pixelCount > (imageInput.width * imageInput.height) / 100) {
+      // %1'den büyük maskeler
+      filteredMasks.push(mask);
+      filteredScores.push(scores[i]);
     }
   }
+  // Maskeleri ve etiketleri çiz
+  updateMaskOverlay(filteredMasks, filteredScores);
+  statusLabel.textContent = `Found ${filteredMasks.length} objects`;
 }
+function updateMaskOverlay(masks, scores) {
+  // Canvas boyutlarını güncelle
+  if (
+    maskCanvas.width !== imageInput.width ||
+    maskCanvas.height !== imageInput.height
+  ) {
+    maskCanvas.width = imageInput.width;
+    maskCanvas.height = imageInput.height;
+  }
+  // Önce canvas'i temizle
+  maskContext.clearRect(0, 0, maskCanvas.width, maskCanvas.height);
+  // Her maskeyi çiz
+  for (let m = 0; m < masks.length; m++) {
+    const mask = masks[m];
+    const imageData = maskContext.createImageData(
+      maskCanvas.width,
+      maskCanvas.height,
+    );
+    const pixelData = imageData.data;
+    // En iyi maskeyi seç
+    const numMasks = scores[m].length;
+    let bestIndex = 0;
+    for (let i = 1; i < numMasks; ++i) {
+      if (scores[m][i] > scores[m][bestIndex]) {
+        bestIndex = i;
+      }
+    }
+    // Maskeyi renklendir
+    const r = Math.random() * 255;
+    const g = Math.random() * 255;
+    const b = Math.random() * 255;
+    for (let i = 0; i < pixelData.length; ++i) {
+      if (mask.data[numMasks * i + bestIndex] === 1) {
+        const offset = 4 * i;
+        pixelData[offset] = r;
+        pixelData[offset + 1] = g;
+        pixelData[offset + 2] = b;
+        pixelData[offset + 3] = 128; // Yarı saydam
+      }
+    }
+    maskContext.putImageData(imageData, 0, 0);
+    // Etiketi ekle
+    let xIndices = [];
+    let yIndices = [];
+    for (let i = 0; i < mask.data.length; i++) {
+      if (mask.data[numMasks * i + bestIndex] === 1) {
+        const x = i % maskCanvas.width;
+        const y = Math.floor(i / maskCanvas.width);
+        xIndices.push(x);
+        yIndices.push(y);
+      }
+    }
+    if (xIndices.length > 0 && yIndices.length > 0) {
+      const centerX = Math.floor(
+        xIndices.reduce((a, b) => a + b, 0) / xIndices.length,
+      );
+      const centerY = Math.floor(
+        yIndices.reduce((a, b) => a + b, 0) / yIndices.length,
+      );
+      maskContext.fillStyle = "white";
+      maskContext.font = "16px Arial";
+      maskContext.strokeStyle = "black";
+      maskContext.lineWidth = 2;
+      const label = `Object ${m + 1}`;
+      maskContext.strokeText(label, centerX, centerY);
+      maskContext.fillText(label, centerX, centerY);
+    }
+  }
+  // Kesme butonunu etkinleştir
+  cutButton.disabled = false;
 }
+// Mevcut event listener'ları koru, ama tıklama olaylarını kaldır
 fileUpload.addEventListener("change", function (e) {
   const file = e.target.files[0];
   if (!file) return;
   const reader = new FileReader();
   reader.onload = (e2) => encode(e2.target.result);
   reader.readAsDataURL(file);
 });
   encode(EXAMPLE_URL);
 });
+resetButton.addEventListener("click", () => {
+  imageInput = null;
+  imageProcessed = null;
+  imageEmbeddings = null;
+  isEncoding = false;
+  maskContext.clearRect(0, 0, maskCanvas.width, maskCanvas.height);
+  cutButton.disabled = true;
+  imageContainer.style.backgroundImage = "none";
+  uploadButton.style.display = "flex";
+  statusLabel.textContent = "Ready";
 });
 cutButton.addEventListener("click", async () => {
   const [w, h] = [maskCanvas.width, maskCanvas.height];
   const maskImageData = maskContext.getImageData(0, 0, w, h);
   const cutCanvas = new OffscreenCanvas(w, h);
   const cutContext = cutCanvas.getContext("2d");
   const maskPixelData = maskImageData.data;
   const imagePixelData = imageInput.data;
   for (let i = 0; i < w * h; ++i) {
+    const sourceOffset = 3 * i;
+    const targetOffset = 4 * i;
     if (maskPixelData[targetOffset + 3] > 0) {
       for (let j = 0; j < 3; ++j) {
         maskPixelData[targetOffset + j] = imagePixelData[sourceOffset + j];
       }
   }
   cutContext.putImageData(maskImageData, 0, 0);
   const link = document.createElement("a");
   link.download = "image.png";
   link.href = URL.createObjectURL(await cutCanvas.convertToBlob());
   link.remove();
 });
+// Modeli yükle
 const model_id = "Xenova/slimsam-77-uniform";
 statusLabel.textContent = "Loading model...";
+model = await SamModel.from_pretrained(model_id, {
+  dtype: "fp16",
   device: "webgpu",
 });
+processor = await AutoProcessor.from_pretrained(model_id);
 statusLabel.textContent = "Ready";
+// UI'yi etkinleştir
 fileUpload.disabled = false;
 uploadButton.style.opacity = 1;
 example.style.pointerEvents = "auto";