# !pip install gradio
### 1. Imports and class names setup ###
import gradio as gr
import os
import torch

from model import create_effnetb2_model
from timeit import default_timer as Timer
from typing import Tuple, Dict

# Setup class names
class_names = ["pizza", "steak", "sushi"]

### 2. Model and transforms preparation ###

# Create EffNetB2 model
effnetb2, effnetb2_transforms = create_effnetb2_model()

# Load saved weights
effnetb2.load_state_dict(
    torch.load(f="09_pretrained_effneb2_feature_extractor_pizza_steak_sushi_20_percent.pth", map_location=torch.device("cpu")) # Load on CPU
)

### 3. Predict function ###
# Create predict function
def predict(img) -> Tuple[Dict, float]:
  """
  Transforms and perform a prediction on img and returns prediction and time taken.
  """
  # Start timer
  start_time = timer()

  # Transform the target image and add a batch dimension
  img = effnetb2_transforms(img).unsqueeze(0)

  # Put the model into evaluation mode and turn on inference mode
  effnetb2.eval()
  with torch.inference_mode():
    # Pass the transformed image through the model and turn the prediction logits into prediction probabilities
    pred_probs = torch.softmax(effnetb2(img), dim=1)

  # Create a prediction label and prediction probability dictionary for each prediction class (this is the required format for Gradio's output parameter)
  pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))} # --> pred_probs[0][i] is the probability for class i of the first input

  # Calculate the prediction time
  pred_time = round(timer() - start_time, 5)

  # Return the prediction dictionary and prediction time
  return pred_labels_and_probs, pred_time

### 4. Gradio app ###

# Create title, description and article strings
title = "FoodVision Mini 🍕🥩🍣"
description = "An EfficientNetB2 feature extractor computer vision model to classify images of food as pizza, steak or sushi."
article = """
        ## 🍽️ FoodVision Mini: Classifying Pizza, Steak, and Sushi with Deep Learning

        FoodVision Mini is a compact computer vision demo built using a pre-trained **EfficientNetB2** model. It classifies food images into one of three categories: **pizza**, **steak**, or **sushi**.

        ### 🔍 How It Works
        - The model uses EfficientNetB2 as a **feature extractor**.
        - It predicts the top 3 most likely classes for any input image.
        - Built with **Gradio**, the interface allows easy interaction for testing the model.

        ### ⚙️ Model Details
        - **Model Architecture**: EfficientNetB2 (from `torchviosion.models`)
        - **Prediction Output**: Top 3 class probabilities
        - **Use Case**: Great for food classification demos.

        ### 🚀 Try It Out
        Upload an image of food (pizza, steak, or sushi) or use one of the provided examples to see predictions and inference time.
      """
# Create examples list from 'examples/' directory
example_list = [["examples/" + example] for example in os.listdir("examples")]

# Create the Gradio demo
demo = gr.Interface(fn=predict, # mapping function from input to output
                    inputs=gr.Image(type="pil"), # what are the inputs?
                    outputs=[gr.Label(num_top_classes=3, label="Predictions"), # what are the outputs?
                             gr.Number(label="Prediction time(s)")], # this fn has two outputs, therefore there are two outputs
                    examples=example_list,
                    title=title,
                    description=description,
                    article=article)

# Launch the demo
demo.launch()