import gradio as gr import numpy as np from transformers import BeitFeatureExtractor, BeitForSemanticSegmentation, DPTFeatureExtractor, DPTForDepthEstimation from PIL import Image, ImageFilter import requests import torch import cv2 # Load the segmentation model and feature extractor segmentation_processor = BeitFeatureExtractor.from_pretrained("microsoft/beit-base-finetuned-ade-640-640") segmentation_model = BeitForSemanticSegmentation.from_pretrained("microsoft/beit-base-finetuned-ade-640-640") # Load the depth estimation model and feature extractor depth_feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") def apply_gaussian_blur(image): # Resize and preprocess the image image = image.resize((512, 512)).convert("RGB") inputs = segmentation_processor(image, return_tensors="pt") # Perform segmentation to get the 'person' mask with torch.no_grad(): outputs = segmentation_model(**inputs) logits = outputs.logits segmentation = torch.argmax(logits, dim=1)[0] # Create a binary mask for the 'person' class (index 12) person_index = 12 binary_mask = (segmentation == person_index).numpy().astype(np.uint8) * 255 # Apply Gaussian blur to the entire image image_np = np.array(image) blurred_image = cv2.GaussianBlur(image_np, (0, 0), sigmaX=15, sigmaY=15) # Normalize the mask to range between 0 and 1 normalized_mask = binary_mask / 255.0 normalized_mask = np.expand_dims(normalized_mask, axis=-1) # Create the composite image final_image = (image_np * normalized_mask + blurred_image * (1 - normalized_mask)).astype(np.uint8) return Image.fromarray(final_image) def apply_lens_blur(image): # Resize and preprocess the image image = image.resize((512, 512)).convert("RGB") depth_inputs = depth_feature_extractor(images=image, return_tensors="pt") # Perform depth estimation with torch.no_grad(): depth_outputs = depth_model(**depth_inputs) predicted_depth = depth_outputs.predicted_depth[0].cpu().numpy() # Normalize and invert the depth map min_depth = predicted_depth.min() max_depth = predicted_depth.max() normalized_depth = (predicted_depth - min_depth) / (max_depth - min_depth) inverted_depth = 1 - normalized_depth # Resize the depth map to match the original image size depth_weight_resized = Image.fromarray((inverted_depth * 255).astype(np.uint8)).resize((512, 512)) depth_weight_resized = np.array(depth_weight_resized) / 255.0 depth_weight_resized = depth_weight_resized[:, :, np.newaxis] # Apply maximum Gaussian blur to the original image blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15)) # Convert images to numpy arrays original_np = np.array(image).astype(np.float32) blurred_np = np.array(blurred_image).astype(np.float32) # Blend the images based on the resized depth map output_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * blurred_np # Convert back to uint8 output_np = np.clip(output_np, 0, 255).astype(np.uint8) return Image.fromarray(output_np) # Define the Gradio interface interface = gr.Interface( fn={"Gaussian Blur": apply_gaussian_blur, "Lens Blur": apply_lens_blur}, inputs=gr.inputs.Image(type="pil"), outputs=gr.outputs.Image(type="pil"), title="Blur Effects with Hugging Face", description="Apply Gaussian Blur or Lens Blur to images using semantic segmentation or depth estimation." ) # Launch the Gradio interface interface.launch()