Spaces:
Runtime error
Runtime error
| """Real-time Speech Interface | |
| This module provides a real-time speech interface using Google's Gemini model. | |
| It handles bidirectional audio streaming with automatic speech recognition and synthesis. | |
| Important: | |
| Use headphones to prevent audio feedback and echo issues. | |
| """ | |
| import argparse | |
| import asyncio | |
| import json | |
| import logging | |
| import os | |
| import traceback | |
| from helpers.loop import AudioLoop, TextLoop | |
| from helpers.session import Session | |
| from models import AudioConfig, ModelConfig | |
| from tools import TOOLS | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def main( | |
| modality: str = "text", system_prompt: str = None, instruction_audio: str = None | |
| ) -> None: | |
| """Entry point for the application.""" | |
| try: | |
| model_config = ModelConfig( | |
| api_key=os.environ.get("GOOGLE_API_KEY"), | |
| name="models/gemini-2.0-flash-exp", | |
| system_instruction=system_prompt, | |
| tools=TOOLS, | |
| generation_config={ | |
| "response_modalities": modality.upper(), | |
| }, | |
| ) | |
| if modality == "audio": | |
| loop_instance = AudioLoop( | |
| audio_config=AudioConfig(), | |
| model_config=model_config, | |
| instruction_audio=instruction_audio, | |
| ) | |
| elif modality == "text": | |
| loop_instance = TextLoop(model_config=model_config) | |
| else: | |
| raise ValueError("Invalid modality") | |
| asyncio.run(loop_instance.run(), debug=True) | |
| except KeyboardInterrupt: | |
| logger.info("Application terminated by user") | |
| except Exception as e: | |
| logger.error(f"Application error: {e}") | |
| logger.debug(traceback.format_exc()) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Real-time Speech Interface") | |
| parser.add_argument( | |
| "-m", | |
| "--modality", | |
| choices=["text", "audio"], | |
| help="Response modality", | |
| required=True, | |
| ) | |
| parser.add_argument( | |
| "--instruction-audio", | |
| type=str, | |
| help="Path to audio instructions (.wav file)", | |
| required=False, | |
| ) | |
| parser.add_argument( | |
| "-q", | |
| "--questions", | |
| type=str, | |
| help="Path to JSON file containing questions", | |
| required=True, | |
| ) | |
| args = parser.parse_args() | |
| with open(args.questions, "r") as f: | |
| questions_dict = json.load(f) | |
| session = Session(questions=questions_dict) | |
| system_prompt = session.zero_shot_prompt("src/prompts/default_prompt.jinja2") | |
| print(system_prompt) | |
| main( | |
| modality=args.modality, | |
| system_prompt=system_prompt, | |
| instruction_audio=args.instruction_audio, | |
| ) | |