import streamlit as st
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array
from PIL import Image
from gtts import gTTS
import tempfile
import base64

# Load the trained model
MODEL_PATH = "image_model.h5"
model = tf.keras.models.load_model(MODEL_PATH)

# Image dimensions
IMG_WIDTH, IMG_HEIGHT = 64, 48

# Class labels
CLASS_LABELS = {
    0: "The person in the uploaded image is driving safely",
    1: "The person in the uploaded image is texting in the right direction and thus, distracted",
    2: "The person in the uploaded image is talking on the phone in the right direction and thus, distracted",
    3: "The person in the uploaded image is texting in the left direction and thus, distracted",
    4: "The person in the uploaded image is talking on the phone in the left direction and thus, distracted",
    5: "The person in the uploaded image is operating the radio and thus, distracted",
    6: "The person in the uploaded image is drinking and thus, distracted",
    7: "The person in the uploaded image is reaching behind and thus, distracted",
    8: "The person in the uploaded image is doing hair and makeup and thus, distracted",
    9: "The person in the uploaded image is talking to a passenger and thus, distracted"
}

def predict_image(image):
    img_array = img_to_array(image)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.0
    predictions = model.predict(img_array)
    predicted_class = np.argmax(predictions, axis=1)[0]
    confidence = np.max(predictions)
    return CLASS_LABELS[predicted_class], confidence

def speak_auto(text):
    tts = gTTS(text=text, lang='en')
    with tempfile.NamedTemporaryFile(delete=True, suffix=".mp3") as fp:
        tts.save(fp.name)
        audio_bytes = fp.read()
        b64 = base64.b64encode(audio_bytes).decode()
        audio_html = f"""
            <audio autoplay>
                <source src="data:audio/mp3;base64,{b64}" type="audio/mp3">
            </audio>
        """
        st.markdown(audio_html, unsafe_allow_html=True)

# Streamlit app
st.title("Driver Distraction Detection")
st.markdown("Team18 Image Project : Sayandip Bhattacharyya, Purnendu Rudrapal, Sridatta Das, Sidhartha Karjee")

uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    image = Image.open(uploaded_file).convert("RGB")
    resized_image = image.resize((IMG_WIDTH, IMG_HEIGHT))
    st.image(image, caption="Uploaded Image", use_container_width=True)

    with st.spinner("Predicting..."):
        predicted_class, confidence = predict_image(resized_image)

    prediction_text = f"{predicted_class}. Prediction confidence: {confidence:.2%}"
    st.subheader("Prediction")
    st.write(prediction_text)

    speak_auto(prediction_text)  # Auto-play speech