juzer09 commited on
Commit
6af9be3
ยท
verified ยท
1 Parent(s): 2446566

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +21 -4
  2. requirements.txt +8 -7
app.py CHANGED
@@ -13,7 +13,8 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Header, Depends
13
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
14
  from pydantic import BaseModel, HttpUrl
15
  import torch
16
- import librosa
 
17
  import tempfile
18
  import requests
19
  from pathlib import Path
@@ -23,6 +24,7 @@ import uvicorn
23
  import asyncio
24
  from contextlib import asynccontextmanager
25
  import socket
 
26
 
27
  # Global model variable
28
  model = None
@@ -209,14 +211,29 @@ def classify_audio(file_path: str) -> dict:
209
  if file_size == 0:
210
  raise ValueError("Audio file is empty")
211
 
212
- # Load audio (model uses 16kHz sample rate)
213
- print("๐Ÿ”Š Loading audio with librosa...")
214
- audio, sr = librosa.load(file_path, sr=16000)
215
  print(f"๐ŸŽผ Audio loaded: {len(audio)} samples at {sr}Hz, duration: {len(audio)/sr:.2f}s")
216
 
217
  if len(audio) == 0:
218
  raise ValueError("Audio file contains no audio data")
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  # Convert to tensor and add batch dimension
221
  print("๐Ÿงฎ Converting to tensor...")
222
  audio_tensor = torch.FloatTensor(audio).unsqueeze(0)
 
13
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
14
  from pydantic import BaseModel, HttpUrl
15
  import torch
16
+ import soundfile as sf
17
+ import scipy.signal
18
  import tempfile
19
  import requests
20
  from pathlib import Path
 
24
  import asyncio
25
  from contextlib import asynccontextmanager
26
  import socket
27
+ import numpy as np
28
 
29
  # Global model variable
30
  model = None
 
211
  if file_size == 0:
212
  raise ValueError("Audio file is empty")
213
 
214
+ # Load audio with soundfile
215
+ print("๐Ÿ”Š Loading audio with soundfile...")
216
+ audio, sr = sf.read(file_path)
217
  print(f"๐ŸŽผ Audio loaded: {len(audio)} samples at {sr}Hz, duration: {len(audio)/sr:.2f}s")
218
 
219
  if len(audio) == 0:
220
  raise ValueError("Audio file contains no audio data")
221
 
222
+ # Convert to mono if stereo
223
+ if audio.ndim > 1:
224
+ print("๐Ÿ”€ Converting stereo to mono...")
225
+ audio = np.mean(audio, axis=1)
226
+
227
+ # Resample to 16kHz if needed (model requirement)
228
+ target_sr = 16000
229
+ if sr != target_sr:
230
+ print(f"๐Ÿ”„ Resampling from {sr}Hz to {target_sr}Hz...")
231
+ # Calculate the number of samples after resampling
232
+ num_samples = int(len(audio) * target_sr / sr)
233
+ audio = scipy.signal.resample(audio, num_samples)
234
+ sr = target_sr
235
+ print(f"โœ… Resampled: {len(audio)} samples at {sr}Hz")
236
+
237
  # Convert to tensor and add batch dimension
238
  print("๐Ÿงฎ Converting to tensor...")
239
  audio_tensor = torch.FloatTensor(audio).unsqueeze(0)
requirements.txt CHANGED
@@ -1,8 +1,9 @@
1
- fastapi==0.104.1
2
- uvicorn==0.24.0
3
- streamlit>=1.28.0
4
- torch>=2.0.0
5
- librosa>=0.9.0
6
- requests>=2.25.0
7
- pydantic>=2.0.0
 
8
  git+https://github.com/awsaf49/sonics.git
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.24.0
3
+ streamlit>=1.28.0
4
+ torch>=2.0.0
5
+ soundfile>=0.12.1
6
+ scipy>=1.9.0
7
+ requests>=2.25.0
8
+ pydantic>=2.0.0
9
  git+https://github.com/awsaf49/sonics.git