Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +21 -4
- requirements.txt +8 -7
app.py
CHANGED
@@ -13,7 +13,8 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Header, Depends
|
|
13 |
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
14 |
from pydantic import BaseModel, HttpUrl
|
15 |
import torch
|
16 |
-
import
|
|
|
17 |
import tempfile
|
18 |
import requests
|
19 |
from pathlib import Path
|
@@ -23,6 +24,7 @@ import uvicorn
|
|
23 |
import asyncio
|
24 |
from contextlib import asynccontextmanager
|
25 |
import socket
|
|
|
26 |
|
27 |
# Global model variable
|
28 |
model = None
|
@@ -209,14 +211,29 @@ def classify_audio(file_path: str) -> dict:
|
|
209 |
if file_size == 0:
|
210 |
raise ValueError("Audio file is empty")
|
211 |
|
212 |
-
# Load audio
|
213 |
-
print("๐ Loading audio with
|
214 |
-
audio, sr =
|
215 |
print(f"๐ผ Audio loaded: {len(audio)} samples at {sr}Hz, duration: {len(audio)/sr:.2f}s")
|
216 |
|
217 |
if len(audio) == 0:
|
218 |
raise ValueError("Audio file contains no audio data")
|
219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
# Convert to tensor and add batch dimension
|
221 |
print("๐งฎ Converting to tensor...")
|
222 |
audio_tensor = torch.FloatTensor(audio).unsqueeze(0)
|
|
|
13 |
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
14 |
from pydantic import BaseModel, HttpUrl
|
15 |
import torch
|
16 |
+
import soundfile as sf
|
17 |
+
import scipy.signal
|
18 |
import tempfile
|
19 |
import requests
|
20 |
from pathlib import Path
|
|
|
24 |
import asyncio
|
25 |
from contextlib import asynccontextmanager
|
26 |
import socket
|
27 |
+
import numpy as np
|
28 |
|
29 |
# Global model variable
|
30 |
model = None
|
|
|
211 |
if file_size == 0:
|
212 |
raise ValueError("Audio file is empty")
|
213 |
|
214 |
+
# Load audio with soundfile
|
215 |
+
print("๐ Loading audio with soundfile...")
|
216 |
+
audio, sr = sf.read(file_path)
|
217 |
print(f"๐ผ Audio loaded: {len(audio)} samples at {sr}Hz, duration: {len(audio)/sr:.2f}s")
|
218 |
|
219 |
if len(audio) == 0:
|
220 |
raise ValueError("Audio file contains no audio data")
|
221 |
|
222 |
+
# Convert to mono if stereo
|
223 |
+
if audio.ndim > 1:
|
224 |
+
print("๐ Converting stereo to mono...")
|
225 |
+
audio = np.mean(audio, axis=1)
|
226 |
+
|
227 |
+
# Resample to 16kHz if needed (model requirement)
|
228 |
+
target_sr = 16000
|
229 |
+
if sr != target_sr:
|
230 |
+
print(f"๐ Resampling from {sr}Hz to {target_sr}Hz...")
|
231 |
+
# Calculate the number of samples after resampling
|
232 |
+
num_samples = int(len(audio) * target_sr / sr)
|
233 |
+
audio = scipy.signal.resample(audio, num_samples)
|
234 |
+
sr = target_sr
|
235 |
+
print(f"โ
Resampled: {len(audio)} samples at {sr}Hz")
|
236 |
+
|
237 |
# Convert to tensor and add batch dimension
|
238 |
print("๐งฎ Converting to tensor...")
|
239 |
audio_tensor = torch.FloatTensor(audio).unsqueeze(0)
|
requirements.txt
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
-
fastapi==0.104.1
|
2 |
-
uvicorn==0.24.0
|
3 |
-
streamlit>=1.28.0
|
4 |
-
torch>=2.0.0
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
8 |
git+https://github.com/awsaf49/sonics.git
|
|
|
1 |
+
fastapi==0.104.1
|
2 |
+
uvicorn==0.24.0
|
3 |
+
streamlit>=1.28.0
|
4 |
+
torch>=2.0.0
|
5 |
+
soundfile>=0.12.1
|
6 |
+
scipy>=1.9.0
|
7 |
+
requests>=2.25.0
|
8 |
+
pydantic>=2.0.0
|
9 |
git+https://github.com/awsaf49/sonics.git
|