import requests import gradio as gr import pyttsx3 import speech_recognition as sr # Replace with your Gemini API Key and endpoint API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps" # Replace with your actual API key API_URL = "https://generativelanguage.googleapis.com/v1alpha2/models/gemini2:generateText" # Gemini API URL # Function to call Gemini API def call_gemini_api(message): headers = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" } payload = { "prompt": message, "max_output_tokens": 100 } try: # Sending request to Gemini API response = requests.post(API_URL, headers=headers, json=payload) if response.status_code == 200: return response.json().get("generated_text", "No response text") else: return f"Error: {response.status_code}, {response.text}" except Exception as e: return f"Error occurred while calling API: {str(e)}" # Convert text to speech (TTS) def text_to_speech(text): try: engine = pyttsx3.init() audio_filename = "response.mp3" engine.save_to_file(text, audio_filename) engine.runAndWait() return audio_filename except Exception as e: print(f"Error with TTS: {e}") return None # Convert audio to text (ASR) def audio_to_text(audio_path): recognizer = sr.Recognizer() with sr.AudioFile(audio_path) as source: audio_data = recognizer.record(source) try: return recognizer.recognize_google(audio_data) except sr.UnknownValueError: return "Could not understand audio" except sr.RequestError: return "Request error with the recognition service" # Define function for Gradio interface def respond(text_input=None, audio_input=None): if audio_input: # If audio input is provided, convert it to text text_input = audio_to_text(audio_input) if not text_input: return "Error: No input provided.", None # Call Gemini API with text input and get response api_response = call_gemini_api(text_input) # Convert the API response text into audio audio_response = text_to_speech(api_response) return api_response, audio_response # Gradio Interface setup demo = gr.Interface( fn=respond, inputs=[ gr.Textbox(label="Text Input", placeholder="Enter your message..."), gr.Audio(type="filepath", label="Audio Input") ], outputs=[ gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio") ] ) if __name__ == "__main__": demo.launch(debug=True)