import requests
import gradio as gr
import pyttsx3
import speech_recognition as sr

# Replace with your Gemini API Key and endpoint
API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps"  # Replace with your actual API key
API_URL = "https://generativelanguage.googleapis.com/v1alpha2/models/gemini2:generateText"  # Gemini API URL

# Function to call Gemini API
def call_gemini_api(message):
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "prompt": message,
        "max_output_tokens": 100
    }
    try:
        # Sending request to Gemini API
        response = requests.post(API_URL, headers=headers, json=payload)
        if response.status_code == 200:
            return response.json().get("generated_text", "No response text")
        else:
            return f"Error: {response.status_code}, {response.text}"
    except Exception as e:
        return f"Error occurred while calling API: {str(e)}"

# Convert text to speech (TTS)
def text_to_speech(text):
    try:
        engine = pyttsx3.init()
        audio_filename = "response.mp3"
        engine.save_to_file(text, audio_filename)
        engine.runAndWait()
        return audio_filename
    except Exception as e:
        print(f"Error with TTS: {e}")
        return None

# Convert audio to text (ASR)
def audio_to_text(audio_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        audio_data = recognizer.record(source)
    try:
        return recognizer.recognize_google(audio_data)
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError:
        return "Request error with the recognition service"

# Define function for Gradio interface
def respond(text_input=None, audio_input=None):
    if audio_input:
        # If audio input is provided, convert it to text
        text_input = audio_to_text(audio_input)
    
    if not text_input:
        return "Error: No input provided.", None
    
    # Call Gemini API with text input and get response
    api_response = call_gemini_api(text_input)
    
    # Convert the API response text into audio
    audio_response = text_to_speech(api_response)
    
    return api_response, audio_response

# Gradio Interface setup
demo = gr.Interface(
    fn=respond,
    inputs=[
        gr.Textbox(label="Text Input", placeholder="Enter your message..."),
        gr.Audio(type="filepath", label="Audio Input")
    ],
    outputs=[
        gr.Textbox(label="Response Text"),
        gr.Audio(label="Response Audio")
    ]
)

if __name__ == "__main__":
    demo.launch(debug=True)