Upload 2 files
Browse files- app.py +39 -3
- requirements.txt +1 -0
app.py
CHANGED
@@ -8,6 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
8 |
from fastapi.staticfiles import StaticFiles
|
9 |
import uvicorn
|
10 |
from pathlib import Path
|
|
|
11 |
|
12 |
# Create FastAPI app
|
13 |
app = FastAPI(title="Speech to Text Model")
|
@@ -28,8 +29,9 @@ recognizer = sr.Recognizer()
|
|
28 |
@app.post("/generate-story")
|
29 |
async def generate_story_api(file: UploadFile = File(...)):
|
30 |
try:
|
31 |
-
# Save uploaded audio to a temp file
|
32 |
-
|
|
|
33 |
tmp.write(await file.read())
|
34 |
tmp_path = tmp.name
|
35 |
|
@@ -49,14 +51,48 @@ async def generate_story_api(file: UploadFile = File(...)):
|
|
49 |
content={"error": str(e)}
|
50 |
)
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
# Function for processing audio (used by both FastAPI and Gradio)
|
53 |
def transcribe_audio(audio_path):
|
54 |
try:
|
|
|
|
|
|
|
55 |
# Use speech_recognition to transcribe
|
56 |
-
with sr.AudioFile(
|
57 |
audio_data = recognizer.record(source)
|
58 |
# Try to use Google's speech recognition for Arabic
|
59 |
text = recognizer.recognize_google(audio_data, language="ar-AR")
|
|
|
|
|
|
|
|
|
|
|
60 |
return text
|
61 |
except sr.UnknownValueError:
|
62 |
return "ูู
ูุชู
ุงูุชุนุฑู ุนูู ุงูููุงู
"
|
|
|
8 |
from fastapi.staticfiles import StaticFiles
|
9 |
import uvicorn
|
10 |
from pathlib import Path
|
11 |
+
from pydub import AudioSegment
|
12 |
|
13 |
# Create FastAPI app
|
14 |
app = FastAPI(title="Speech to Text Model")
|
|
|
29 |
@app.post("/generate-story")
|
30 |
async def generate_story_api(file: UploadFile = File(...)):
|
31 |
try:
|
32 |
+
# Save uploaded audio to a temp file with original extension
|
33 |
+
file_extension = os.path.splitext(file.filename)[1] if file.filename else ".wav"
|
34 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp:
|
35 |
tmp.write(await file.read())
|
36 |
tmp_path = tmp.name
|
37 |
|
|
|
51 |
content={"error": str(e)}
|
52 |
)
|
53 |
|
54 |
+
# Convert any audio format to WAV
|
55 |
+
def convert_to_wav(audio_path):
|
56 |
+
try:
|
57 |
+
# Get the file extension
|
58 |
+
file_extension = os.path.splitext(audio_path)[1].lower()
|
59 |
+
|
60 |
+
# If already WAV, don't convert
|
61 |
+
if file_extension == ".wav":
|
62 |
+
return audio_path
|
63 |
+
|
64 |
+
# Create a new temporary WAV file
|
65 |
+
wav_path = os.path.splitext(audio_path)[0] + "_converted.wav"
|
66 |
+
|
67 |
+
# Convert based on file extension
|
68 |
+
if file_extension in [".mp3", ".m4a", ".ogg", ".flac", ".aac"]:
|
69 |
+
audio = AudioSegment.from_file(audio_path)
|
70 |
+
audio.export(wav_path, format="wav")
|
71 |
+
return wav_path
|
72 |
+
else:
|
73 |
+
# For unknown formats, try a generic approach
|
74 |
+
audio = AudioSegment.from_file(audio_path)
|
75 |
+
audio.export(wav_path, format="wav")
|
76 |
+
return wav_path
|
77 |
+
except Exception as e:
|
78 |
+
raise Exception(f"Error converting audio format: {str(e)}")
|
79 |
+
|
80 |
# Function for processing audio (used by both FastAPI and Gradio)
|
81 |
def transcribe_audio(audio_path):
|
82 |
try:
|
83 |
+
# Convert audio to WAV format first
|
84 |
+
wav_path = convert_to_wav(audio_path)
|
85 |
+
|
86 |
# Use speech_recognition to transcribe
|
87 |
+
with sr.AudioFile(wav_path) as source:
|
88 |
audio_data = recognizer.record(source)
|
89 |
# Try to use Google's speech recognition for Arabic
|
90 |
text = recognizer.recognize_google(audio_data, language="ar-AR")
|
91 |
+
|
92 |
+
# Clean up converted file if it's different from the original
|
93 |
+
if wav_path != audio_path and os.path.exists(wav_path):
|
94 |
+
os.remove(wav_path)
|
95 |
+
|
96 |
return text
|
97 |
except sr.UnknownValueError:
|
98 |
return "ูู
ูุชู
ุงูุชุนุฑู ุนูู ุงูููุงู
"
|
requirements.txt
CHANGED
@@ -4,3 +4,4 @@ fastapi==0.103.1
|
|
4 |
uvicorn==0.23.2
|
5 |
python-multipart==0.0.6
|
6 |
pydub==0.25.1
|
|
|
|
4 |
uvicorn==0.23.2
|
5 |
python-multipart==0.0.6
|
6 |
pydub==0.25.1
|
7 |
+
ffmpeg-python==0.2.0
|