Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import asyncio | |
| from fastapi import FastAPI, File, UploadFile, HTTPException, Depends | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse | |
| from pydantic import BaseModel | |
| from typing import Optional, Dict, Any | |
| import uvicorn | |
| from groq import Groq | |
| from dotenv import load_dotenv | |
| import librosa | |
| import soundfile as sf | |
| from main2 import UnifiedAudioAnalyzer, summarize_audio_analysis_with_llm | |
| # Load environment variables | |
| load_dotenv() | |
| app = FastAPI(title="Audio Analysis API", version="1.0.0") | |
| # CORS middleware | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["http://localhost:9002", "http://localhost:3000"], # Frontend URLs | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Initialize the audio analyzer | |
| analyzer = UnifiedAudioAnalyzer(enable_parallel_processing=True) | |
| # Groq client for chat | |
| groq_client = None | |
| try: | |
| groq_api_key = os.getenv("GROQ_API_KEY") | |
| if groq_api_key: | |
| groq_client = Groq(api_key=groq_api_key) | |
| except Exception as e: | |
| print(f"Warning: Could not initialize Groq client: {e}") | |
| # Pydantic models | |
| class ChatRequest(BaseModel): | |
| question: str | |
| analysis_data: Dict[str, Any] | |
| class ChatResponse(BaseModel): | |
| answer: str | |
| class AnalysisResponse(BaseModel): | |
| success: bool | |
| data: Optional[Dict[str, Any]] = None | |
| error: Optional[str] = None | |
| def convert_audio_to_wav(audio_file_path: str) -> str: | |
| """Convert audio file to WAV format if needed""" | |
| try: | |
| # Load audio with librosa (supports many formats) | |
| audio_data, sample_rate = librosa.load(audio_file_path, sr=16000) | |
| # Create temporary WAV file | |
| temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) | |
| temp_wav_path = temp_wav.name | |
| temp_wav.close() | |
| # Save as WAV | |
| sf.write(temp_wav_path, audio_data, sample_rate) | |
| return temp_wav_path | |
| except Exception as e: | |
| raise HTTPException(status_code=400, detail=f"Error converting audio to WAV: {str(e)}") | |
| async def root(): | |
| return {"message": "Audio Analysis API is running"} | |
| async def upload_audio(file: UploadFile = File(...)): | |
| """Upload and analyze audio file""" | |
| try: | |
| # Check file type | |
| if not file.content_type.startswith("audio/"): | |
| raise HTTPException(status_code=400, detail="File must be an audio file") | |
| # Create temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.split('.')[-1]}") as temp_file: | |
| content = await file.read() | |
| temp_file.write(content) | |
| temp_file_path = temp_file.name | |
| try: | |
| # Convert to WAV if needed | |
| wav_file_path = convert_audio_to_wav(temp_file_path) | |
| # Perform analysis | |
| analysis_results = analyzer.analyze_complete_audio(wav_file_path) | |
| if not analysis_results: | |
| raise HTTPException(status_code=500, detail="Analysis failed") | |
| # Generate LLM summary | |
| try: | |
| summary = summarize_audio_analysis_with_llm(analysis_results) | |
| analysis_results['llm_summary'] = summary | |
| except Exception as e: | |
| print(f"Warning: LLM summary failed: {e}") | |
| analysis_results['llm_summary'] = "Summary generation failed" | |
| return AnalysisResponse( | |
| success=True, | |
| data=analysis_results | |
| ) | |
| finally: | |
| # Clean up temporary files | |
| try: | |
| os.unlink(temp_file_path) | |
| if 'wav_file_path' in locals(): | |
| os.unlink(wav_file_path) | |
| except OSError: | |
| pass | |
| except Exception as e: | |
| return AnalysisResponse( | |
| success=False, | |
| error=str(e) | |
| ) | |
| async def chat_with_analysis(request: ChatRequest): | |
| """Chat with AI about the analysis results""" | |
| if not groq_client: | |
| raise HTTPException(status_code=500, detail="Groq API not configured") | |
| try: | |
| # Prepare context from analysis data | |
| context = f""" | |
| Audio Analysis Summary: | |
| - File: {request.analysis_data.get('file_info', {}).get('filename', 'Unknown')} | |
| - Duration: {request.analysis_data.get('file_info', {}).get('duration', 0):.2f} seconds | |
| - LLM Summary: {request.analysis_data.get('llm_summary', 'No summary available')} | |
| Speaker Diarization: | |
| {request.analysis_data.get('diarization_transcription', [])} | |
| Audio Events: | |
| {request.analysis_data.get('audio_events', {}).get('top_events', [])} | |
| Emotion Analysis: | |
| {request.analysis_data.get('emotion_analysis', {})} | |
| Paralinguistic Features: | |
| {request.analysis_data.get('paralinguistic_features', {})} | |
| """ | |
| # Create chat completion | |
| response = groq_client.chat.completions.create( | |
| model="llama-3.1-8b-instant", # Using smaller model as requested | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": "You are an expert audio analyst. Answer questions about the provided audio analysis data. Be helpful and provide insights based on the analysis results." | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"Context: {context}\n\nQuestion: {request.question}" | |
| } | |
| ], | |
| temperature=0.7, | |
| max_tokens=1000 | |
| ) | |
| answer = response.choices[0].message.content.strip() | |
| return ChatResponse(answer=answer) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}") | |
| async def health_check(): | |
| """Health check endpoint""" | |
| return {"status": "healthy", "analyzer_loaded": analyzer is not None} | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |