# llava.py from speech_to_text import transcribe_audio from text_to_speech import text_to_speech_file import google.generativeai as genai import os from dotenv import load_dotenv load_dotenv() # Configure the Generative AI model GENAI_API_KEY = os.getenv("GENAI_API_KEY") genai.configure(api_key=GENAI_API_KEY) model = genai.GenerativeModel("gemini-1.5-flash") def generate_response(prompt: str) -> str: response = model.generate_content(prompt) return response.text def main(audio_file: str) -> str: # Transcribe audio to text transcript = transcribe_audio(audio_file) if not transcript: return "Transcription failed." # Generate response from the LLM response_text = generate_response(transcript) # Convert response text to speech if response_text: audio_output = text_to_speech_file(response_text) return audio_output else: return "Failed to generate response."