import os import tempfile import asyncio from fastapi import FastAPI, File, UploadFile, HTTPException, Depends from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from pydantic import BaseModel from typing import Optional, Dict, Any import uvicorn from groq import Groq from dotenv import load_dotenv import librosa import soundfile as sf from main2 import UnifiedAudioAnalyzer, summarize_audio_analysis_with_llm # Load environment variables load_dotenv() app = FastAPI(title="Audio Analysis API", version="1.0.0") # CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["http://localhost:9002", "http://localhost:3000"], # Frontend URLs allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Initialize the audio analyzer analyzer = UnifiedAudioAnalyzer(enable_parallel_processing=True) # Groq client for chat groq_client = None try: groq_api_key = os.getenv("GROQ_API_KEY") if groq_api_key: groq_client = Groq(api_key=groq_api_key) except Exception as e: print(f"Warning: Could not initialize Groq client: {e}") # Pydantic models class ChatRequest(BaseModel): question: str analysis_data: Dict[str, Any] class ChatResponse(BaseModel): answer: str class AnalysisResponse(BaseModel): success: bool data: Optional[Dict[str, Any]] = None error: Optional[str] = None def convert_audio_to_wav(audio_file_path: str) -> str: """Convert audio file to WAV format if needed""" try: # Load audio with librosa (supports many formats) audio_data, sample_rate = librosa.load(audio_file_path, sr=16000) # Create temporary WAV file temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) temp_wav_path = temp_wav.name temp_wav.close() # Save as WAV sf.write(temp_wav_path, audio_data, sample_rate) return temp_wav_path except Exception as e: raise HTTPException(status_code=400, detail=f"Error converting audio to WAV: {str(e)}") @app.get("/") async def root(): return {"message": "Audio Analysis API is running"} @app.post("/upload", response_model=AnalysisResponse) async def upload_audio(file: UploadFile = File(...)): """Upload and analyze audio file""" try: # Check file type if not file.content_type.startswith("audio/"): raise HTTPException(status_code=400, detail="File must be an audio file") # Create temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.split('.')[-1]}") as temp_file: content = await file.read() temp_file.write(content) temp_file_path = temp_file.name try: # Convert to WAV if needed wav_file_path = convert_audio_to_wav(temp_file_path) # Perform analysis analysis_results = analyzer.analyze_complete_audio(wav_file_path) if not analysis_results: raise HTTPException(status_code=500, detail="Analysis failed") # Generate LLM summary try: summary = summarize_audio_analysis_with_llm(analysis_results) analysis_results['llm_summary'] = summary except Exception as e: print(f"Warning: LLM summary failed: {e}") analysis_results['llm_summary'] = "Summary generation failed" return AnalysisResponse( success=True, data=analysis_results ) finally: # Clean up temporary files try: os.unlink(temp_file_path) if 'wav_file_path' in locals(): os.unlink(wav_file_path) except OSError: pass except Exception as e: return AnalysisResponse( success=False, error=str(e) ) @app.post("/chat", response_model=ChatResponse) async def chat_with_analysis(request: ChatRequest): """Chat with AI about the analysis results""" if not groq_client: raise HTTPException(status_code=500, detail="Groq API not configured") try: # Prepare context from analysis data context = f""" Audio Analysis Summary: - File: {request.analysis_data.get('file_info', {}).get('filename', 'Unknown')} - Duration: {request.analysis_data.get('file_info', {}).get('duration', 0):.2f} seconds - LLM Summary: {request.analysis_data.get('llm_summary', 'No summary available')} Speaker Diarization: {request.analysis_data.get('diarization_transcription', [])} Audio Events: {request.analysis_data.get('audio_events', {}).get('top_events', [])} Emotion Analysis: {request.analysis_data.get('emotion_analysis', {})} Paralinguistic Features: {request.analysis_data.get('paralinguistic_features', {})} """ # Create chat completion response = groq_client.chat.completions.create( model="llama-3.1-8b-instant", # Using smaller model as requested messages=[ { "role": "system", "content": "You are an expert audio analyst. Answer questions about the provided audio analysis data. Be helpful and provide insights based on the analysis results." }, { "role": "user", "content": f"Context: {context}\n\nQuestion: {request.question}" } ], temperature=0.7, max_tokens=1000 ) answer = response.choices[0].message.content.strip() return ChatResponse(answer=answer) except Exception as e: raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}") @app.get("/health") async def health_check(): """Health check endpoint""" return {"status": "healthy", "analyzer_loaded": analyzer is not None} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)