Spaces:

tristan-deep
/

semantic-diffusion-echo-dehazing

Running on Zero

App Files Files Community

tristan-deep commited on 20 days ago

Commit

1c76709

1 Parent(s): 1398519

added gif

Browse files

Files changed (10) hide show

.gitignore +1 -0
README.md +4 -0
assets/patient-17-4C-frame-11.png +0 -0
assets/patient-2-4C-frame-9.png +0 -0
assets/patient-50-4C-frame-53.png +0 -0
configs/semantic_dps_opt.yaml +32 -0
main.py +18 -132
plots.py +62 -0
sweeper.py +0 -1
utils.py +133 -0

.gitignore CHANGED Viewed

@@ -2,6 +2,7 @@
 .env
 temp/
 *.png
 *.pdf
 *.hash
 *.npz

 .env
 temp/
 *.png
+!assets/*.png
 *.pdf
 *.hash
 *.npz

README.md CHANGED Viewed

@@ -13,6 +13,9 @@
 	<p>Eindhoven University of Technology, the Netherlands</p>
 </div>
 ### Installation
@@ -21,6 +24,7 @@ The algorithm is implemented using Keras with JAX backend. Furthermore it heavil
 Either install the following in your Python environment, or use the [Dockerfile](./Dockerfile) provided in this repository.
 ```bash
 pip install tyro optuna zea==0.0.4
 pip install -U "jax[cuda12]"
 ```

 	<p>Eindhoven University of Technology, the Netherlands</p>
 </div>
+<p align="center">
+	<img src="animation.gif" alt="Cardiac Ultrasound Dehazing Animation" style="max-width: 100%; height: auto;">
+</p>
 ### Installation
 Either install the following in your Python environment, or use the [Dockerfile](./Dockerfile) provided in this repository.
 ```bash
+# requires Python>=3.10
 pip install tyro optuna zea==0.0.4
 pip install -U "jax[cuda12]"
 ```

assets/patient-17-4C-frame-11.png DELETED Viewed

Binary file (27.4 kB)

assets/patient-2-4C-frame-9.png ADDED Viewed

assets/patient-50-4C-frame-53.png DELETED Viewed

Binary file (22.1 kB)

configs/semantic_dps_opt.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+# While these params optimize for the final score of the challenge
+# generally we observe lower perceptual quality of the dehazed results
+# and in some cases even artifacts.
+# we therefore recommend using configs/semantic_dps.yaml instead.
+diffusion_model_path: "hf://tristan-deep/semantic-diffusion-echo-dehazing"
+segmentation_model_path: "hf://tristan-deep/semantic-segmentation-echo-dehazing"
+seed: 42
+params:
+  diffusion_steps: 480
+  initial_diffusion_step: 0
+  batch_size: 16
+  threshold_output_quantile: 0.17447
+  preserve_bottom_percent: 32.0
+  bottom_transition_width: 7.0
+  mask_params:
+    sigma: 0.4704516
+    threshold: 0.18935
+  fixed_mask_params:
+    top_px: 20
+    bottom_px: 40
+  skeleton_params:
+    sigma_pre: 9.919
+    sigma_post: 9.5347840479
+    threshold: 0.73917
+  guidance_kwargs:
+    omega: 0.78
+    omega_vent: 0.0001
+    omega_sept: 15.84
+    eta: 0.01105
+    smooth_l1_beta: 6.3726

main.py CHANGED Viewed

@@ -10,7 +10,7 @@ import tyro
 import zea
 from keras import ops
 from PIL import Image
-from skimage import filters, morphology
 from zea import Config, init_device, log
 from zea.internal.operators import Operator
 from zea.models.diffusion import (
@@ -21,136 +21,14 @@ from zea.models.diffusion import (
 from zea.tensor_ops import L2
 from zea.utils import translate
-from plots import plot_batch_with_named_masks, plot_dehazed_results
-def L1(x):
-    """L1 norm of a tensor.
-    Implementation of L1 norm: https://mathworld.wolfram.com/L1-Norm.html
-    """
-    return ops.sum(ops.abs(x))
-def smooth_L1(x, beta=0.4):
-    """Smooth L1 loss function.
-    Implementation of Smooth L1 loss. Large beta values make it similar to L1 loss,
-    while small beta values make it similar to L2 loss.
-    """
-    abs_x = ops.abs(x)
-    loss = ops.where(abs_x < beta, 0.5 * x**2 / beta, abs_x - 0.5 * beta)
-    return ops.sum(loss)
-def postprocess(data, normalization_range):
-    """Postprocess data from model output to image."""
-    data = ops.clip(data, *normalization_range)
-    data = translate(data, normalization_range, (0, 255))
-    data = ops.convert_to_numpy(data)
-    data = np.squeeze(data, axis=-1)
-    return np.clip(data, 0, 255).astype("uint8")
-def preprocess(data, normalization_range):
-    """Preprocess data for model input. Converts uint8 image(s) in [0, 255] to model input range."""
-    data = ops.convert_to_tensor(data, dtype="float32")
-    data = translate(data, (0, 255), normalization_range)
-    data = ops.expand_dims(data, axis=-1)
-    return data
-def apply_bottom_preservation(
-    output_images, input_images, preserve_bottom_percent=30.0, transition_width=10.0
-):
-    """Apply bottom preservation with smooth windowed transition.
-    Args:
-        output_images: Model output images, (batch, height, width, channels)
-        input_images: Original input images, (batch, height, width, channels)
-        preserve_bottom_percent: Percentage of bottom to preserve from input (default 30%)
-        transition_width: Percentage of image height for smooth transition (default 10%)
-    Returns:
-        Blended images with preserved bottom portion
-    """
-    output_shape = ops.shape(output_images)
-    batch_size, height, width, channels = output_shape
-    preserve_height = int(height * preserve_bottom_percent / 100.0)
-    transition_height = int(height * transition_width / 100.0)
-    transition_start = height - preserve_height - transition_height
-    preserve_start = height - preserve_height
-    transition_start = max(0, transition_start)
-    preserve_start = min(height, preserve_start)
-    if transition_start >= preserve_start:
-        transition_start = preserve_start
-        transition_height = 0
-    y_coords = ops.arange(height, dtype="float32")
-    y_coords = ops.reshape(y_coords, (height, 1, 1))
-    if transition_height > 0:
-        # Smooth transition using cosine interpolation
-        transition_region = ops.logical_and(
-            y_coords >= transition_start, y_coords < preserve_start
-        )
-        transition_progress = (y_coords - transition_start) / transition_height
-        transition_progress = ops.clip(transition_progress, 0.0, 1.0)
-        # Use cosine for smooth transition (0.5 * (1 - cos(π * t)))
-        cosine_weight = 0.5 * (1.0 - ops.cos(np.pi * transition_progress))
-        blend_weight = ops.where(
-            y_coords < transition_start,
-            0.0,
-            ops.where(
-                transition_region,
-                cosine_weight,
-                1.0,
-            ),
-        )
-    else:
-        # No transition, just hard switch
-        blend_weight = ops.where(y_coords >= preserve_start, 1.0, 0.0)
-    blend_weight = ops.expand_dims(blend_weight, axis=0)
-    blended_images = (1.0 - blend_weight) * output_images + blend_weight * input_images
-    return blended_images
-def extract_skeleton(images, input_range, sigma_pre=4, sigma_post=4, threshold=0.3):
-    """Extract skeletons from the input images."""
-    images_np = ops.convert_to_numpy(images)
-    images_np = np.clip(images_np, input_range[0], input_range[1])
-    images_np = translate(images_np, input_range, (0, 1))
-    images_np = np.squeeze(images_np, axis=-1)
-    skeleton_masks = []
-    for img in images_np:
-        img[img < threshold] = 0
-        smoothed = filters.gaussian(img, sigma=sigma_pre)
-        binary = smoothed > filters.threshold_otsu(smoothed)
-        skeleton = morphology.skeletonize(binary)
-        skeleton = morphology.dilation(skeleton, morphology.disk(2))
-        skeleton = filters.gaussian(skeleton.astype(np.float32), sigma=sigma_post)
-        skeleton_masks.append(skeleton)
-    skeleton_masks = np.array(skeleton_masks)
-    skeleton_masks = np.expand_dims(skeleton_masks, axis=-1)
-    # normalize to [0, 1]
-    min_val, max_val = np.min(skeleton_masks), np.max(skeleton_masks)
-    skeleton_masks = (skeleton_masks - min_val) / (max_val - min_val + 1e-8)
-    return ops.convert_to_tensor(skeleton_masks, dtype=images.dtype)
 class IdentityOperator(Operator):
@@ -250,7 +128,6 @@ class SemanticDPS(DPS):
             masks_sept + masks_fixed + masks_skeleton + masks_dark, 0, 1
         )
-        # background = not masks_strong, not vent
         background = ops.where(masks_strong < 0.1, 1.0, 0.0) * ops.where(
             masks_vent == 0, 1.0, 0.0
         )
@@ -534,6 +411,7 @@ def main(
     masks_viz = copy.deepcopy(masks)
     masks_viz.pop("haze")
     masks_viz = {k: v[:num_img] for k, v in masks_viz.items()}
     fig = plot_batch_with_named_masks(
@@ -553,6 +431,14 @@ def main(
     fig.savefig(path.with_suffix(".pdf"), **save_kwargs)
     log.success(f"Segmentation steps saved to {log.yellow(path)}")
     plt.close("all")

 import zea
 from keras import ops
 from PIL import Image
+from skimage import filters
 from zea import Config, init_device, log
 from zea.internal.operators import Operator
 from zea.models.diffusion import (
 from zea.tensor_ops import L2
 from zea.utils import translate
+from plots import create_animation, plot_batch_with_named_masks, plot_dehazed_results
+from utils import (
+    apply_bottom_preservation,
+    extract_skeleton,
+    postprocess,
+    preprocess,
+    smooth_L1,
+)
 class IdentityOperator(Operator):
             masks_sept + masks_fixed + masks_skeleton + masks_dark, 0, 1
         )
         background = ops.where(masks_strong < 0.1, 1.0, 0.0) * ops.where(
             masks_vent == 0, 1.0, 0.0
         )
     masks_viz = copy.deepcopy(masks)
     masks_viz.pop("haze")
+    num_img = 2  # hardcoded as the plotting figure only neatly supports 2 rows
     masks_viz = {k: v[:num_img] for k, v in masks_viz.items()}
     fig = plot_batch_with_named_masks(
     fig.savefig(path.with_suffix(".pdf"), **save_kwargs)
     log.success(f"Segmentation steps saved to {log.yellow(path)}")
+    last_batch_size = len(diffusion_model.track_progress[0])
+    create_animation(
+        preprocess(hazy_images[-last_batch_size:], diffusion_model.input_range),
+        diffusion_model,
+        output_path="animation.gif",
+        fps=10,
+    )
     plt.close("all")

plots.py CHANGED Viewed

@@ -2,6 +2,7 @@ import json
 from pathlib import Path
 from typing import Any, Dict, List
 import matplotlib.pyplot as plt
 import numpy as np
 import tyro
@@ -9,8 +10,13 @@ from keras import ops
 from matplotlib.patches import PathPatch
 from matplotlib.path import Path as pltPath
 from skimage import measure
 from zea.visualize import plot_image_grid
 def add_shape_from_mask(ax, mask, **kwargs):
     """add a shape to axis from mask array.
@@ -335,6 +341,62 @@ def plot_optimization_history_from_json(
     plt.close(fig)
 def main(json_file: str, output_dir: str = "plots", method: str = "semantic_dps"):
     json_path = Path(json_file)
     if not json_path.exists():

 from pathlib import Path
 from typing import Any, Dict, List
+import keras
 import matplotlib.pyplot as plt
 import numpy as np
 import tyro
 from matplotlib.patches import PathPatch
 from matplotlib.path import Path as pltPath
 from skimage import measure
+from zea import log
+from zea.io_lib import matplotlib_figure_to_numpy
+from zea.utils import save_to_gif
 from zea.visualize import plot_image_grid
+from utils import postprocess
 def add_shape_from_mask(ax, mask, **kwargs):
     """add a shape to axis from mask array.
     plt.close(fig)
+def create_animation_frame(hazy_images, tissue_frame, haze_frame):
+    """Create a single animation frame from the tracked progress."""
+    batch, height, width = ops.shape(hazy_images)
+    frame_stack = ops.stack(
+        [
+            hazy_images,
+            tissue_frame,
+            haze_frame,
+        ]
+    )
+    frame_stack = ops.reshape(frame_stack, (-1, height, width))
+    fig_frame, _ = plot_image_grid(
+        frame_stack,
+        ncols=len(hazy_images),
+        remove_axis=False,
+        vmin=0,
+        vmax=255,
+    )
+    labels = ["Hazy", "Tissue"] if haze_frame is None else ["Hazy", "Tissue", "Haze"]
+    for i, ax in enumerate(fig_frame.axes):
+        label = labels[i % len(labels)]
+        ax.set_ylabel(label, fontsize=12)
+    frame_array = matplotlib_figure_to_numpy(fig_frame)
+    plt.close(fig_frame)
+    return frame_array
+def create_animation(hazy_images, diffusion_model, output_path, fps):
+    """Create animation from tracked progress frames."""
+    if not (len(diffusion_model.track_progress) > 1):
+        log.warning(
+            "Animation requested but no intermediate frames were tracked. "
+            "Try reducing diffusion_steps or ensure progress tracking is enabled."
+        )
+        return
+    log.info(f"Creating animation with {len(diffusion_model.track_progress)} frames...")
+    animation_frames = []
+    progbar = keras.utils.Progbar(
+        len(diffusion_model.track_progress), unit_name="frame"
+    )
+    for tissue_frame in diffusion_model.track_progress:
+        haze_frame = hazy_images - tissue_frame - 1
+        tissue_frame = postprocess(tissue_frame, diffusion_model.input_range)
+        haze_frame = postprocess(haze_frame, diffusion_model.input_range)
+        _hazy_images = postprocess(hazy_images, diffusion_model.input_range)
+        frame_array = create_animation_frame(_hazy_images, tissue_frame, haze_frame)
+        animation_frames.append(frame_array)
+        progbar.add(1)
+    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+    animation_path = Path(output_path).with_suffix(".gif")
+    save_to_gif(animation_frames, animation_path, fps=fps)
 def main(json_file: str, output_dir: str = "plots", method: str = "semantic_dps"):
     json_path = Path(json_file)
     if not json_path.exists():

sweeper.py CHANGED Viewed

@@ -136,7 +136,6 @@ class OptunaObjective:
                 "omega": trial.suggest_float("omega", 0.5, 50.0, log=True),
                 "omega_vent": trial.suggest_float("omega_vent", 0.0001, 50.0, log=True),
                 "omega_sept": trial.suggest_float("omega_sept", 0.1, 50.0, log=True),
-                "omega_dark": trial.suggest_float("omega_dark", 0.001, 50.0, log=True),
                 "eta": trial.suggest_float("eta", 0.001, 1.0, log=True),
                 "smooth_l1_beta": trial.suggest_float(
                     "smooth_l1_beta", 0.1, 10.0, log=True

                 "omega": trial.suggest_float("omega", 0.5, 50.0, log=True),
                 "omega_vent": trial.suggest_float("omega_vent", 0.0001, 50.0, log=True),
                 "omega_sept": trial.suggest_float("omega_sept", 0.1, 50.0, log=True),
                 "eta": trial.suggest_float("eta", 0.001, 1.0, log=True),
                 "smooth_l1_beta": trial.suggest_float(
                     "smooth_l1_beta", 0.1, 10.0, log=True

utils.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import numpy as np
+from keras import ops
+from skimage import filters, morphology
+from zea.utils import translate
+def L1(x):
+    """L1 norm of a tensor.
+    Implementation of L1 norm: https://mathworld.wolfram.com/L1-Norm.html
+    """
+    return ops.sum(ops.abs(x))
+def smooth_L1(x, beta=0.4):
+    """Smooth L1 loss function.
+    Implementation of Smooth L1 loss. Large beta values make it similar to L1 loss,
+    while small beta values make it similar to L2 loss.
+    """
+    abs_x = ops.abs(x)
+    loss = ops.where(abs_x < beta, 0.5 * x**2 / beta, abs_x - 0.5 * beta)
+    return ops.sum(loss)
+def postprocess(data, normalization_range):
+    """Postprocess data from model output to image."""
+    data = ops.clip(data, *normalization_range)
+    data = translate(data, normalization_range, (0, 255))
+    data = ops.convert_to_numpy(data)
+    data = np.squeeze(data, axis=-1)
+    return np.clip(data, 0, 255).astype("uint8")
+def preprocess(data, normalization_range):
+    """Preprocess data for model input. Converts uint8 image(s) in [0, 255] to model input range."""
+    data = ops.convert_to_tensor(data, dtype="float32")
+    data = translate(data, (0, 255), normalization_range)
+    data = ops.expand_dims(data, axis=-1)
+    return data
+def apply_bottom_preservation(
+    output_images, input_images, preserve_bottom_percent=30.0, transition_width=10.0
+):
+    """Apply bottom preservation with smooth windowed transition.
+    Args:
+        output_images: Model output images, (batch, height, width, channels)
+        input_images: Original input images, (batch, height, width, channels)
+        preserve_bottom_percent: Percentage of bottom to preserve from input (default 30%)
+        transition_width: Percentage of image height for smooth transition (default 10%)
+    Returns:
+        Blended images with preserved bottom portion
+    """
+    output_shape = ops.shape(output_images)
+    batch_size, height, width, channels = output_shape
+    preserve_height = int(height * preserve_bottom_percent / 100.0)
+    transition_height = int(height * transition_width / 100.0)
+    transition_start = height - preserve_height - transition_height
+    preserve_start = height - preserve_height
+    transition_start = max(0, transition_start)
+    preserve_start = min(height, preserve_start)
+    if transition_start >= preserve_start:
+        transition_start = preserve_start
+        transition_height = 0
+    y_coords = ops.arange(height, dtype="float32")
+    y_coords = ops.reshape(y_coords, (height, 1, 1))
+    if transition_height > 0:
+        # Smooth transition using cosine interpolation
+        transition_region = ops.logical_and(
+            y_coords >= transition_start, y_coords < preserve_start
+        )
+        transition_progress = (y_coords - transition_start) / transition_height
+        transition_progress = ops.clip(transition_progress, 0.0, 1.0)
+        # Use cosine for smooth transition (0.5 * (1 - cos(π * t)))
+        cosine_weight = 0.5 * (1.0 - ops.cos(np.pi * transition_progress))
+        blend_weight = ops.where(
+            y_coords < transition_start,
+            0.0,
+            ops.where(
+                transition_region,
+                cosine_weight,
+                1.0,
+            ),
+        )
+    else:
+        # No transition, just hard switch
+        blend_weight = ops.where(y_coords >= preserve_start, 1.0, 0.0)
+    blend_weight = ops.expand_dims(blend_weight, axis=0)
+    blended_images = (1.0 - blend_weight) * output_images + blend_weight * input_images
+    return blended_images
+def extract_skeleton(images, input_range, sigma_pre=4, sigma_post=4, threshold=0.3):
+    """Extract skeletons from the input images."""
+    images_np = ops.convert_to_numpy(images)
+    images_np = np.clip(images_np, input_range[0], input_range[1])
+    images_np = translate(images_np, input_range, (0, 1))
+    images_np = np.squeeze(images_np, axis=-1)
+    skeleton_masks = []
+    for img in images_np:
+        img[img < threshold] = 0
+        smoothed = filters.gaussian(img, sigma=sigma_pre)
+        binary = smoothed > filters.threshold_otsu(smoothed)
+        skeleton = morphology.skeletonize(binary)
+        skeleton = morphology.dilation(skeleton, morphology.disk(2))
+        skeleton = filters.gaussian(skeleton.astype(np.float32), sigma=sigma_post)
+        skeleton_masks.append(skeleton)
+    skeleton_masks = np.array(skeleton_masks)
+    skeleton_masks = np.expand_dims(skeleton_masks, axis=-1)
+    # normalize to [0, 1]
+    min_val, max_val = np.min(skeleton_masks), np.max(skeleton_masks)
+    skeleton_masks = (skeleton_masks - min_val) / (max_val - min_val + 1e-8)
+    return ops.convert_to_tensor(skeleton_masks, dtype=images.dtype)