Spaces:

AIZEN-007
/

ALM-backend

Sleeping

App Files Files Community

ALM-backend / server.py

AIZEN-007

Update server.py

59e3a97 verified 2 months ago

raw

history blame contribute delete

6.31 kB

	import os
	import tempfile
	import asyncio
	from fastapi import FastAPI, File, UploadFile, HTTPException, Depends
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from pydantic import BaseModel
	from typing import Optional, Dict, Any
	import uvicorn
	from groq import Groq
	from dotenv import load_dotenv
	import librosa
	import soundfile as sf
	from main2 import UnifiedAudioAnalyzer, summarize_audio_analysis_with_llm

	# Load environment variables
	load_dotenv()

	app = FastAPI(title="Audio Analysis API", version="1.0.0")

	# CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["http://localhost:9002", "http://localhost:3000"], # Frontend URLs
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Initialize the audio analyzer
	analyzer = UnifiedAudioAnalyzer(enable_parallel_processing=True)

	# Groq client for chat
	groq_client = None
	try:
	groq_api_key = os.getenv("GROQ_API_KEY")
	if groq_api_key:
	groq_client = Groq(api_key=groq_api_key)
	except Exception as e:
	print(f"Warning: Could not initialize Groq client: {e}")

	# Pydantic models
	class ChatRequest(BaseModel):
	question: str
	analysis_data: Dict[str, Any]

	class ChatResponse(BaseModel):
	answer: str

	class AnalysisResponse(BaseModel):
	success: bool
	data: Optional[Dict[str, Any]] = None
	error: Optional[str] = None

	def convert_audio_to_wav(audio_file_path: str) -> str:
	"""Convert audio file to WAV format if needed"""
	try:
	# Load audio with librosa (supports many formats)
	audio_data, sample_rate = librosa.load(audio_file_path, sr=16000)

	# Create temporary WAV file
	temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
	temp_wav_path = temp_wav.name
	temp_wav.close()

	# Save as WAV
	sf.write(temp_wav_path, audio_data, sample_rate)

	return temp_wav_path
	except Exception as e:
	raise HTTPException(status_code=400, detail=f"Error converting audio to WAV: {str(e)}")

	@app.get("/")
	async def root():
	return {"message": "Audio Analysis API is running"}

	@app.post("/upload", response_model=AnalysisResponse)
	async def upload_audio(file: UploadFile = File(...)):
	"""Upload and analyze audio file"""
	try:
	# Check file type
	if not file.content_type.startswith("audio/"):
	raise HTTPException(status_code=400, detail="File must be an audio file")

	# Create temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.split('.')[-1]}") as temp_file:
	content = await file.read()
	temp_file.write(content)
	temp_file_path = temp_file.name

	try:
	# Convert to WAV if needed
	wav_file_path = convert_audio_to_wav(temp_file_path)

	# Perform analysis
	analysis_results = analyzer.analyze_complete_audio(wav_file_path)

	if not analysis_results:
	raise HTTPException(status_code=500, detail="Analysis failed")

	# Generate LLM summary
	try:
	summary = summarize_audio_analysis_with_llm(analysis_results)
	analysis_results['llm_summary'] = summary
	except Exception as e:
	print(f"Warning: LLM summary failed: {e}")
	analysis_results['llm_summary'] = "Summary generation failed"

	return AnalysisResponse(
	success=True,
	data=analysis_results
	)

	finally:
	# Clean up temporary files
	try:
	os.unlink(temp_file_path)
	if 'wav_file_path' in locals():
	os.unlink(wav_file_path)
	except OSError:
	pass

	except Exception as e:
	return AnalysisResponse(
	success=False,
	error=str(e)
	)

	@app.post("/chat", response_model=ChatResponse)
	async def chat_with_analysis(request: ChatRequest):
	"""Chat with AI about the analysis results"""
	if not groq_client:
	raise HTTPException(status_code=500, detail="Groq API not configured")

	try:
	# Prepare context from analysis data
	context = f"""
	Audio Analysis Summary:
	- File: {request.analysis_data.get('file_info', {}).get('filename', 'Unknown')}
	- Duration: {request.analysis_data.get('file_info', {}).get('duration', 0):.2f} seconds
	- LLM Summary: {request.analysis_data.get('llm_summary', 'No summary available')}

	Speaker Diarization:
	{request.analysis_data.get('diarization_transcription', [])}

	Audio Events:
	{request.analysis_data.get('audio_events', {}).get('top_events', [])}

	Emotion Analysis:
	{request.analysis_data.get('emotion_analysis', {})}

	Paralinguistic Features:
	{request.analysis_data.get('paralinguistic_features', {})}
	"""

	# Create chat completion
	response = groq_client.chat.completions.create(
	model="llama-3.1-8b-instant", # Using smaller model as requested
	messages=[
	{
	"role": "system",
	"content": "You are an expert audio analyst. Answer questions about the provided audio analysis data. Be helpful and provide insights based on the analysis results."
	},
	{
	"role": "user",
	"content": f"Context: {context}\n\nQuestion: {request.question}"
	}
	],
	temperature=0.7,
	max_tokens=1000
	)

	answer = response.choices[0].message.content.strip()

	return ChatResponse(answer=answer)

	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}")

	@app.get("/health")
	async def health_check():
	"""Health check endpoint"""
	return {"status": "healthy", "analyzer_loaded": analyzer is not None}

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=7860)