ALM-backend / server.py
AIZEN-007's picture
Update server.py
59e3a97 verified
import os
import tempfile
import asyncio
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import Optional, Dict, Any
import uvicorn
from groq import Groq
from dotenv import load_dotenv
import librosa
import soundfile as sf
from main2 import UnifiedAudioAnalyzer, summarize_audio_analysis_with_llm
# Load environment variables
load_dotenv()
app = FastAPI(title="Audio Analysis API", version="1.0.0")
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:9002", "http://localhost:3000"], # Frontend URLs
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize the audio analyzer
analyzer = UnifiedAudioAnalyzer(enable_parallel_processing=True)
# Groq client for chat
groq_client = None
try:
groq_api_key = os.getenv("GROQ_API_KEY")
if groq_api_key:
groq_client = Groq(api_key=groq_api_key)
except Exception as e:
print(f"Warning: Could not initialize Groq client: {e}")
# Pydantic models
class ChatRequest(BaseModel):
question: str
analysis_data: Dict[str, Any]
class ChatResponse(BaseModel):
answer: str
class AnalysisResponse(BaseModel):
success: bool
data: Optional[Dict[str, Any]] = None
error: Optional[str] = None
def convert_audio_to_wav(audio_file_path: str) -> str:
"""Convert audio file to WAV format if needed"""
try:
# Load audio with librosa (supports many formats)
audio_data, sample_rate = librosa.load(audio_file_path, sr=16000)
# Create temporary WAV file
temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
temp_wav_path = temp_wav.name
temp_wav.close()
# Save as WAV
sf.write(temp_wav_path, audio_data, sample_rate)
return temp_wav_path
except Exception as e:
raise HTTPException(status_code=400, detail=f"Error converting audio to WAV: {str(e)}")
@app.get("/")
async def root():
return {"message": "Audio Analysis API is running"}
@app.post("/upload", response_model=AnalysisResponse)
async def upload_audio(file: UploadFile = File(...)):
"""Upload and analyze audio file"""
try:
# Check file type
if not file.content_type.startswith("audio/"):
raise HTTPException(status_code=400, detail="File must be an audio file")
# Create temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.split('.')[-1]}") as temp_file:
content = await file.read()
temp_file.write(content)
temp_file_path = temp_file.name
try:
# Convert to WAV if needed
wav_file_path = convert_audio_to_wav(temp_file_path)
# Perform analysis
analysis_results = analyzer.analyze_complete_audio(wav_file_path)
if not analysis_results:
raise HTTPException(status_code=500, detail="Analysis failed")
# Generate LLM summary
try:
summary = summarize_audio_analysis_with_llm(analysis_results)
analysis_results['llm_summary'] = summary
except Exception as e:
print(f"Warning: LLM summary failed: {e}")
analysis_results['llm_summary'] = "Summary generation failed"
return AnalysisResponse(
success=True,
data=analysis_results
)
finally:
# Clean up temporary files
try:
os.unlink(temp_file_path)
if 'wav_file_path' in locals():
os.unlink(wav_file_path)
except OSError:
pass
except Exception as e:
return AnalysisResponse(
success=False,
error=str(e)
)
@app.post("/chat", response_model=ChatResponse)
async def chat_with_analysis(request: ChatRequest):
"""Chat with AI about the analysis results"""
if not groq_client:
raise HTTPException(status_code=500, detail="Groq API not configured")
try:
# Prepare context from analysis data
context = f"""
Audio Analysis Summary:
- File: {request.analysis_data.get('file_info', {}).get('filename', 'Unknown')}
- Duration: {request.analysis_data.get('file_info', {}).get('duration', 0):.2f} seconds
- LLM Summary: {request.analysis_data.get('llm_summary', 'No summary available')}
Speaker Diarization:
{request.analysis_data.get('diarization_transcription', [])}
Audio Events:
{request.analysis_data.get('audio_events', {}).get('top_events', [])}
Emotion Analysis:
{request.analysis_data.get('emotion_analysis', {})}
Paralinguistic Features:
{request.analysis_data.get('paralinguistic_features', {})}
"""
# Create chat completion
response = groq_client.chat.completions.create(
model="llama-3.1-8b-instant", # Using smaller model as requested
messages=[
{
"role": "system",
"content": "You are an expert audio analyst. Answer questions about the provided audio analysis data. Be helpful and provide insights based on the analysis results."
},
{
"role": "user",
"content": f"Context: {context}\n\nQuestion: {request.question}"
}
],
temperature=0.7,
max_tokens=1000
)
answer = response.choices[0].message.content.strip()
return ChatResponse(answer=answer)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}")
@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy", "analyzer_loaded": analyzer is not None}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)