import gradio as gr
import torch
from ultralytics import YOLO
from PIL import Image

def load_model():
    """
    Load the YOLOv8 segmentation model onto GPU (if available)
    with mixed‑precision enabled.
    """
    device = "cuda" if torch.cuda.is_available() else "cpu"        # GPU if available :contentReference[oaicite:2]{index=2}
    model = YOLO('yolov8x-seg.pt').to(device)                     # Segmentation variant for finer masks :contentReference[oaicite:3]{index=3}
    return model, device

model, device = load_model()

def count_persons(image: Image.Image) -> str:
    """
    Run inference on the input image, apply TTA, filter for class 0 (person),
    and return the total count.
    """
    # Perform prediction with augmentation (TTA), limit detections, and only class 0
    results = model.predict(
        source=image,
        conf=0.6,          # Confidence threshold
        imgsz=640,         # Inference resolution
        augment=True,      # Test Time Augmentation :contentReference[oaicite:4]{index=4}
        max_det=300,       # Cap detections for crowded scenes
        classes=[0]        # Only detect persons (class 0) :contentReference[oaicite:5]{index=5}
    )

    # Sum counts across all results (usually one per image)
    total = sum(len(r.boxes) for r in results)
    return f"Persons detected: {total}"

# Build Gradio interface
demo = gr.Interface(
    fn=count_persons,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=gr.Text(label="Person Count"),
    title="Advanced Person Counter with YOLOv8",
    description=(
        "Upload an image to count people using a state‑of‑the‑art "
        "YOLOv8 segmentation model with Test‑Time Augmentation."
    ),
    examples=[  # optional: add example images if you like
        # ["examples/crowd1.jpg"],
        # ["examples/street_scene.jpg"],
    ]
)

if __name__ == "__main__":
    demo.launch()  # Launch locally; add `share=True` for a public link