Spaces:
Sleeping
Sleeping
File size: 6,306 Bytes
783be15 59e3a97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import os
import tempfile
import asyncio
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import Optional, Dict, Any
import uvicorn
from groq import Groq
from dotenv import load_dotenv
import librosa
import soundfile as sf
from main2 import UnifiedAudioAnalyzer, summarize_audio_analysis_with_llm
# Load environment variables
load_dotenv()
app = FastAPI(title="Audio Analysis API", version="1.0.0")
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:9002", "http://localhost:3000"], # Frontend URLs
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize the audio analyzer
analyzer = UnifiedAudioAnalyzer(enable_parallel_processing=True)
# Groq client for chat
groq_client = None
try:
groq_api_key = os.getenv("GROQ_API_KEY")
if groq_api_key:
groq_client = Groq(api_key=groq_api_key)
except Exception as e:
print(f"Warning: Could not initialize Groq client: {e}")
# Pydantic models
class ChatRequest(BaseModel):
question: str
analysis_data: Dict[str, Any]
class ChatResponse(BaseModel):
answer: str
class AnalysisResponse(BaseModel):
success: bool
data: Optional[Dict[str, Any]] = None
error: Optional[str] = None
def convert_audio_to_wav(audio_file_path: str) -> str:
"""Convert audio file to WAV format if needed"""
try:
# Load audio with librosa (supports many formats)
audio_data, sample_rate = librosa.load(audio_file_path, sr=16000)
# Create temporary WAV file
temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
temp_wav_path = temp_wav.name
temp_wav.close()
# Save as WAV
sf.write(temp_wav_path, audio_data, sample_rate)
return temp_wav_path
except Exception as e:
raise HTTPException(status_code=400, detail=f"Error converting audio to WAV: {str(e)}")
@app.get("/")
async def root():
return {"message": "Audio Analysis API is running"}
@app.post("/upload", response_model=AnalysisResponse)
async def upload_audio(file: UploadFile = File(...)):
"""Upload and analyze audio file"""
try:
# Check file type
if not file.content_type.startswith("audio/"):
raise HTTPException(status_code=400, detail="File must be an audio file")
# Create temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.split('.')[-1]}") as temp_file:
content = await file.read()
temp_file.write(content)
temp_file_path = temp_file.name
try:
# Convert to WAV if needed
wav_file_path = convert_audio_to_wav(temp_file_path)
# Perform analysis
analysis_results = analyzer.analyze_complete_audio(wav_file_path)
if not analysis_results:
raise HTTPException(status_code=500, detail="Analysis failed")
# Generate LLM summary
try:
summary = summarize_audio_analysis_with_llm(analysis_results)
analysis_results['llm_summary'] = summary
except Exception as e:
print(f"Warning: LLM summary failed: {e}")
analysis_results['llm_summary'] = "Summary generation failed"
return AnalysisResponse(
success=True,
data=analysis_results
)
finally:
# Clean up temporary files
try:
os.unlink(temp_file_path)
if 'wav_file_path' in locals():
os.unlink(wav_file_path)
except OSError:
pass
except Exception as e:
return AnalysisResponse(
success=False,
error=str(e)
)
@app.post("/chat", response_model=ChatResponse)
async def chat_with_analysis(request: ChatRequest):
"""Chat with AI about the analysis results"""
if not groq_client:
raise HTTPException(status_code=500, detail="Groq API not configured")
try:
# Prepare context from analysis data
context = f"""
Audio Analysis Summary:
- File: {request.analysis_data.get('file_info', {}).get('filename', 'Unknown')}
- Duration: {request.analysis_data.get('file_info', {}).get('duration', 0):.2f} seconds
- LLM Summary: {request.analysis_data.get('llm_summary', 'No summary available')}
Speaker Diarization:
{request.analysis_data.get('diarization_transcription', [])}
Audio Events:
{request.analysis_data.get('audio_events', {}).get('top_events', [])}
Emotion Analysis:
{request.analysis_data.get('emotion_analysis', {})}
Paralinguistic Features:
{request.analysis_data.get('paralinguistic_features', {})}
"""
# Create chat completion
response = groq_client.chat.completions.create(
model="llama-3.1-8b-instant", # Using smaller model as requested
messages=[
{
"role": "system",
"content": "You are an expert audio analyst. Answer questions about the provided audio analysis data. Be helpful and provide insights based on the analysis results."
},
{
"role": "user",
"content": f"Context: {context}\n\nQuestion: {request.question}"
}
],
temperature=0.7,
max_tokens=1000
)
answer = response.choices[0].message.content.strip()
return ChatResponse(answer=answer)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}")
@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy", "analyzer_loaded": analyzer is not None}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)
|