|
|
import base64 |
|
|
from io import BytesIO |
|
|
from typing import Tuple, Optional |
|
|
|
|
|
import cv2 |
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
|
|
|
try: |
|
|
import mediapipe as mp |
|
|
HAS_MEDIAPIPE = True |
|
|
except Exception: |
|
|
HAS_MEDIAPIPE = False |
|
|
|
|
|
|
|
|
def _ensure_rgb_uint8(image: np.ndarray) -> np.ndarray: |
|
|
"""Convert an input image array to RGB uint8 format. |
|
|
|
|
|
Gradio provides images as numpy arrays in RGB order with dtype uint8 by default, |
|
|
but we defensively normalize here in case inputs vary. |
|
|
""" |
|
|
if image is None: |
|
|
raise ValueError("No image provided") |
|
|
|
|
|
if isinstance(image, Image.Image): |
|
|
image = np.array(image.convert("RGB")) |
|
|
elif image.dtype != np.uint8: |
|
|
image = image.astype(np.uint8) |
|
|
|
|
|
if image.ndim == 2: |
|
|
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) |
|
|
elif image.shape[2] == 4: |
|
|
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) |
|
|
return image |
|
|
|
|
|
|
|
|
def _central_crop_bbox(width: int, height: int, frac: float = 0.6) -> Tuple[int, int, int, int]: |
|
|
"""Return a central crop bounding box (x1, y1, x2, y2) covering `frac` of width/height.""" |
|
|
frac = float(np.clip(frac, 0.2, 1.0)) |
|
|
crop_w = int(width * frac) |
|
|
crop_h = int(height * frac) |
|
|
x1 = (width - crop_w) // 2 |
|
|
y1 = (height - crop_h) // 2 |
|
|
x2 = x1 + crop_w |
|
|
y2 = y1 + crop_h |
|
|
return x1, y1, x2, y2 |
|
|
|
|
|
|
|
|
def _detect_face_bbox_mediapipe(image_rgb: np.ndarray) -> Optional[Tuple[int, int, int, int]]: |
|
|
"""Detect a face bounding box using MediaPipe Face Detection and return (x1, y1, x2, y2). |
|
|
|
|
|
Returns None if detection fails or mediapipe is unavailable. |
|
|
""" |
|
|
if not HAS_MEDIAPIPE: |
|
|
return None |
|
|
height, width = image_rgb.shape[:2] |
|
|
try: |
|
|
with mp.solutions.face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5) as detector: |
|
|
results = detector.process(image_rgb) |
|
|
detections = results.detections or [] |
|
|
if not detections: |
|
|
return None |
|
|
|
|
|
def bbox_area(det): |
|
|
bbox = det.location_data.relative_bounding_box |
|
|
return max(0.0, bbox.width) * max(0.0, bbox.height) |
|
|
|
|
|
best = max(detections, key=bbox_area) |
|
|
rb = best.location_data.relative_bounding_box |
|
|
x1 = int(np.clip(rb.xmin * width, 0, width - 1)) |
|
|
y1 = int(np.clip(rb.ymin * height, 0, height - 1)) |
|
|
x2 = int(np.clip((rb.xmin + rb.width) * width, 0, width)) |
|
|
y2 = int(np.clip((rb.ymin + rb.height) * height, 0, height)) |
|
|
|
|
|
|
|
|
pad_x = int(0.08 * width) |
|
|
pad_y = int(0.12 * height) |
|
|
x1 = int(np.clip(x1 - pad_x, 0, width - 1)) |
|
|
y1 = int(np.clip(y1 - pad_y, 0, height - 1)) |
|
|
x2 = int(np.clip(x2 + pad_x, 0, width)) |
|
|
y2 = int(np.clip(y2 + pad_y, 0, height)) |
|
|
|
|
|
if x2 - x1 < 10 or y2 - y1 < 10: |
|
|
return None |
|
|
return x1, y1, x2, y2 |
|
|
except Exception: |
|
|
return None |
|
|
|
|
|
|
|
|
def _binary_open_close(mask: np.ndarray, kernel_size: int = 5, iterations: int = 1) -> np.ndarray: |
|
|
"""Apply morphological open then close to clean the binary mask.""" |
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)) |
|
|
opened = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=iterations) |
|
|
closed = cv2.morphologyEx(opened, cv2.MORPH_CLOSE, kernel, iterations=iterations) |
|
|
return closed |
|
|
|
|
|
|
|
|
def _skin_mask_ycrcb(image_rgb: np.ndarray) -> np.ndarray: |
|
|
"""Skin detection using YCrCb thresholding. |
|
|
|
|
|
Returns a binary mask (uint8 0/255) where 255 denotes skin-like pixels. |
|
|
Thresholds are chosen to be reasonably inclusive for diverse skin tones. |
|
|
""" |
|
|
image_ycrcb = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2YCrCb) |
|
|
Y, Cr, Cb = cv2.split(image_ycrcb) |
|
|
|
|
|
|
|
|
cr_min, cr_max = 133, 180 |
|
|
cb_min, cb_max = 77, 140 |
|
|
|
|
|
mask_cr = cv2.inRange(Cr, cr_min, cr_max) |
|
|
mask_cb = cv2.inRange(Cb, cb_min, cb_max) |
|
|
mask = cv2.bitwise_and(mask_cr, mask_cb) |
|
|
|
|
|
mask = _binary_open_close(mask, kernel_size=5, iterations=1) |
|
|
mask = cv2.GaussianBlur(mask, (5, 5), 0) |
|
|
_, mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) |
|
|
return mask |
|
|
|
|
|
|
|
|
def _skin_mask_hsv(image_rgb: np.ndarray) -> np.ndarray: |
|
|
"""Auxiliary HSV-based skin detection mask.""" |
|
|
image_hsv = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2HSV) |
|
|
H, S, V = cv2.split(image_hsv) |
|
|
|
|
|
|
|
|
h_min, h_max = 0, 50 |
|
|
s_min, s_max = int(0.20 * 255), int(0.80 * 255) |
|
|
v_min = int(0.20 * 255) |
|
|
|
|
|
mask_h = cv2.inRange(H, h_min, h_max) |
|
|
mask_s = cv2.inRange(S, s_min, s_max) |
|
|
mask_v = cv2.inRange(V, v_min, 255) |
|
|
mask = cv2.bitwise_and(cv2.bitwise_and(mask_h, mask_s), mask_v) |
|
|
mask = _binary_open_close(mask, kernel_size=5, iterations=1) |
|
|
return mask |
|
|
|
|
|
|
|
|
def _combine_masks(mask1: np.ndarray, mask2: np.ndarray) -> np.ndarray: |
|
|
if mask1 is None: |
|
|
return mask2 |
|
|
if mask2 is None: |
|
|
return mask1 |
|
|
combined = cv2.bitwise_and(mask1, mask2) |
|
|
return combined |
|
|
|
|
|
|
|
|
def _compute_skin_color_hex(image_rgb: np.ndarray, mask: np.ndarray) -> Tuple[str, np.ndarray]: |
|
|
"""Compute a robust representative skin color as a hex string and return also the RGB color. |
|
|
|
|
|
Uses median across masked pixels to reduce influence of highlights/shadows. |
|
|
""" |
|
|
if mask is None or mask.size == 0: |
|
|
raise ValueError("Invalid mask for skin color computation") |
|
|
|
|
|
|
|
|
mask_bool = mask.astype(bool) |
|
|
if not np.any(mask_bool): |
|
|
raise ValueError("No skin pixels detected") |
|
|
|
|
|
skin_pixels = image_rgb[mask_bool] |
|
|
|
|
|
|
|
|
median_color = np.median(skin_pixels, axis=0) |
|
|
median_color = np.clip(median_color, 0, 255).astype(np.uint8) |
|
|
|
|
|
r, g, b = int(median_color[0]), int(median_color[1]), int(median_color[2]) |
|
|
hex_code = f"#{r:02X}{g:02X}{b:02X}" |
|
|
return hex_code, median_color |
|
|
|
|
|
|
|
|
def _solid_color_image(color_rgb: np.ndarray, size: Tuple[int, int] = (160, 160)) -> np.ndarray: |
|
|
swatch = np.zeros((size[1], size[0], 3), dtype=np.uint8) |
|
|
swatch[:, :] = color_rgb |
|
|
return swatch |
|
|
|
|
|
|
|
|
def detect_skin_tone(image: np.ndarray, center_focus: bool = True, use_face_detector: bool = False) -> Tuple[str, np.ndarray, np.ndarray]: |
|
|
"""Main pipeline: returns (hex_code, color_swatch_image, debug_mask_overlay). |
|
|
|
|
|
- image: input image as numpy array (H, W, 3) RGB uint8 |
|
|
- center_focus: if True, prioritizes central crop region to avoid background/hands |
|
|
""" |
|
|
rgb = _ensure_rgb_uint8(image) |
|
|
height, width = rgb.shape[:2] |
|
|
|
|
|
|
|
|
face_bbox: Optional[Tuple[int, int, int, int]] = None |
|
|
if use_face_detector: |
|
|
face_bbox = _detect_face_bbox_mediapipe(rgb) |
|
|
|
|
|
if face_bbox is not None: |
|
|
x1, y1, x2, y2 = face_bbox |
|
|
central_rgb = rgb[y1:y2, x1:x2] |
|
|
elif center_focus: |
|
|
x1, y1, x2, y2 = _central_crop_bbox(width, height, frac=0.7) |
|
|
central_rgb = rgb[y1:y2, x1:x2] |
|
|
else: |
|
|
x1, y1, x2, y2 = 0, 0, width, height |
|
|
central_rgb = rgb |
|
|
|
|
|
mask_ycrcb = _skin_mask_ycrcb(central_rgb) |
|
|
mask_hsv = _skin_mask_hsv(central_rgb) |
|
|
combined_mask = _combine_masks(mask_ycrcb, mask_hsv) |
|
|
|
|
|
|
|
|
if np.count_nonzero(combined_mask) < 100: |
|
|
combined_mask = mask_ycrcb |
|
|
|
|
|
|
|
|
if np.count_nonzero(combined_mask) < 100: |
|
|
patch_frac = 0.2 |
|
|
px1, py1, px2, py2 = _central_crop_bbox(central_rgb.shape[1], central_rgb.shape[0], frac=patch_frac) |
|
|
patch = central_rgb[py1:py2, px1:px2] |
|
|
median_color = np.median(patch.reshape(-1, 3), axis=0).astype(np.uint8) |
|
|
r, g, b = int(median_color[0]), int(median_color[1]), int(median_color[2]) |
|
|
hex_code = f"#{r:02X}{g:02X}{b:02X}" |
|
|
|
|
|
|
|
|
swatch = _solid_color_image(median_color) |
|
|
|
|
|
debug_overlay = rgb.copy() |
|
|
cv2.rectangle(debug_overlay, (x1 + px1, y1 + py1), (x1 + px2, y1 + py2), (255, 0, 0), 2) |
|
|
return hex_code, swatch, debug_overlay |
|
|
|
|
|
|
|
|
hex_code, color_rgb = _compute_skin_color_hex(central_rgb, combined_mask) |
|
|
|
|
|
|
|
|
swatch = _solid_color_image(color_rgb) |
|
|
|
|
|
|
|
|
full_mask = np.zeros((height, width), dtype=np.uint8) |
|
|
full_mask[y1:y2, x1:x2] = combined_mask |
|
|
color_mask = cv2.cvtColor(full_mask, cv2.COLOR_GRAY2RGB) |
|
|
overlay = cv2.addWeighted(rgb, 0.8, color_mask, 0.2, 0) |
|
|
|
|
|
return hex_code, swatch, overlay |
|
|
|
|
|
|
|
|
def _hex_html(hex_code: str) -> str: |
|
|
style = ( |
|
|
"display:flex;align-items:center;gap:12px;padding:8px 0;" |
|
|
) |
|
|
swatch_style = ( |
|
|
f"width:20px;height:20px;border-radius:4px;background:{hex_code};" |
|
|
"border:1px solid #ccc;" |
|
|
) |
|
|
return ( |
|
|
f"<div style='{style}'>" |
|
|
f"<div style='{swatch_style}'></div>" |
|
|
f"<span style='font-family:monospace;font-size:16px'>{hex_code}</span>" |
|
|
"</div>" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Skin Tone Detector") as demo: |
|
|
gr.Markdown( |
|
|
""" |
|
|
### Skin Tone Hex Detector |
|
|
Upload a face image. The app estimates a representative skin tone and returns a HEX color. |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_image = gr.Image( |
|
|
label="Upload face image", |
|
|
type="numpy", |
|
|
image_mode="RGB", |
|
|
height=360, |
|
|
) |
|
|
center_focus = gr.Checkbox(value=True, label="Center focus (ignore edges)") |
|
|
use_face_det = gr.Checkbox(value=True if HAS_MEDIAPIPE else False, label="Use face detection (MediaPipe)") |
|
|
run_btn = gr.Button("Detect Skin Tone", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
hex_output = gr.HTML(label="HEX Color") |
|
|
swatch_output = gr.Image(label="Color Swatch", type="numpy") |
|
|
debug_output = gr.Image(label="Mask Overlay", type="numpy") |
|
|
if not HAS_MEDIAPIPE: |
|
|
gr.Markdown("MediaPipe not installed or unavailable. Face detection toggle will be ignored.") |
|
|
|
|
|
def _run(image: Optional[np.ndarray], center_focus: bool, use_face_det_flag: bool): |
|
|
if image is None: |
|
|
return _hex_html("#000000"), np.zeros((160, 160, 3), dtype=np.uint8), None |
|
|
hex_code, swatch, debug = detect_skin_tone( |
|
|
image, |
|
|
center_focus=center_focus, |
|
|
use_face_detector=(use_face_det_flag and HAS_MEDIAPIPE), |
|
|
) |
|
|
return _hex_html(hex_code), swatch, debug |
|
|
|
|
|
run_btn.click(_run, inputs=[input_image, center_focus, use_face_det], outputs=[hex_output, swatch_output, debug_output]) |
|
|
input_image.change(_run, inputs=[input_image, center_focus, use_face_det], outputs=[hex_output, swatch_output, debug_output]) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|
|
|
|
|
|
|