File size: 6,306 Bytes
783be15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59e3a97
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import os
import tempfile
import asyncio
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import Optional, Dict, Any
import uvicorn
from groq import Groq
from dotenv import load_dotenv
import librosa
import soundfile as sf
from main2 import UnifiedAudioAnalyzer, summarize_audio_analysis_with_llm

# Load environment variables
load_dotenv()

app = FastAPI(title="Audio Analysis API", version="1.0.0")

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["http://localhost:9002", "http://localhost:3000"],  # Frontend URLs
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize the audio analyzer
analyzer = UnifiedAudioAnalyzer(enable_parallel_processing=True)

# Groq client for chat
groq_client = None
try:
    groq_api_key = os.getenv("GROQ_API_KEY")
    if groq_api_key:
        groq_client = Groq(api_key=groq_api_key)
except Exception as e:
    print(f"Warning: Could not initialize Groq client: {e}")

# Pydantic models
class ChatRequest(BaseModel):
    question: str
    analysis_data: Dict[str, Any]

class ChatResponse(BaseModel):
    answer: str

class AnalysisResponse(BaseModel):
    success: bool
    data: Optional[Dict[str, Any]] = None
    error: Optional[str] = None

def convert_audio_to_wav(audio_file_path: str) -> str:
    """Convert audio file to WAV format if needed"""
    try:
        # Load audio with librosa (supports many formats)
        audio_data, sample_rate = librosa.load(audio_file_path, sr=16000)
        
        # Create temporary WAV file
        temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
        temp_wav_path = temp_wav.name
        temp_wav.close()
        
        # Save as WAV
        sf.write(temp_wav_path, audio_data, sample_rate)
        
        return temp_wav_path
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Error converting audio to WAV: {str(e)}")

@app.get("/")
async def root():
    return {"message": "Audio Analysis API is running"}

@app.post("/upload", response_model=AnalysisResponse)
async def upload_audio(file: UploadFile = File(...)):
    """Upload and analyze audio file"""
    try:
        # Check file type
        if not file.content_type.startswith("audio/"):
            raise HTTPException(status_code=400, detail="File must be an audio file")
        
        # Create temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.split('.')[-1]}") as temp_file:
            content = await file.read()
            temp_file.write(content)
            temp_file_path = temp_file.name
        
        try:
            # Convert to WAV if needed
            wav_file_path = convert_audio_to_wav(temp_file_path)
            
            # Perform analysis
            analysis_results = analyzer.analyze_complete_audio(wav_file_path)
            
            if not analysis_results:
                raise HTTPException(status_code=500, detail="Analysis failed")
            
            # Generate LLM summary
            try:
                summary = summarize_audio_analysis_with_llm(analysis_results)
                analysis_results['llm_summary'] = summary
            except Exception as e:
                print(f"Warning: LLM summary failed: {e}")
                analysis_results['llm_summary'] = "Summary generation failed"
            
            return AnalysisResponse(
                success=True,
                data=analysis_results
            )
            
        finally:
            # Clean up temporary files
            try:
                os.unlink(temp_file_path)
                if 'wav_file_path' in locals():
                    os.unlink(wav_file_path)
            except OSError:
                pass
                
    except Exception as e:
        return AnalysisResponse(
            success=False,
            error=str(e)
        )

@app.post("/chat", response_model=ChatResponse)
async def chat_with_analysis(request: ChatRequest):
    """Chat with AI about the analysis results"""
    if not groq_client:
        raise HTTPException(status_code=500, detail="Groq API not configured")
    
    try:
        # Prepare context from analysis data
        context = f"""
        Audio Analysis Summary:
        - File: {request.analysis_data.get('file_info', {}).get('filename', 'Unknown')}
        - Duration: {request.analysis_data.get('file_info', {}).get('duration', 0):.2f} seconds
        - LLM Summary: {request.analysis_data.get('llm_summary', 'No summary available')}
        
        Speaker Diarization:
        {request.analysis_data.get('diarization_transcription', [])}
        
        Audio Events:
        {request.analysis_data.get('audio_events', {}).get('top_events', [])}
        
        Emotion Analysis:
        {request.analysis_data.get('emotion_analysis', {})}
        
        Paralinguistic Features:
        {request.analysis_data.get('paralinguistic_features', {})}
        """
        
        # Create chat completion
        response = groq_client.chat.completions.create(
            model="llama-3.1-8b-instant",  # Using smaller model as requested
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert audio analyst. Answer questions about the provided audio analysis data. Be helpful and provide insights based on the analysis results."
                },
                {
                    "role": "user",
                    "content": f"Context: {context}\n\nQuestion: {request.question}"
                }
            ],
            temperature=0.7,
            max_tokens=1000
        )
        
        answer = response.choices[0].message.content.strip()
        
        return ChatResponse(answer=answer)
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}")

@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {"status": "healthy", "analyzer_loaded": analyzer is not None}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)