walker11 commited on
Commit
4197dc6
ยท
verified ยท
1 Parent(s): 2abb745

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +39 -3
  2. requirements.txt +1 -0
app.py CHANGED
@@ -8,6 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.staticfiles import StaticFiles
9
  import uvicorn
10
  from pathlib import Path
 
11
 
12
  # Create FastAPI app
13
  app = FastAPI(title="Speech to Text Model")
@@ -28,8 +29,9 @@ recognizer = sr.Recognizer()
28
  @app.post("/generate-story")
29
  async def generate_story_api(file: UploadFile = File(...)):
30
  try:
31
- # Save uploaded audio to a temp file
32
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
 
33
  tmp.write(await file.read())
34
  tmp_path = tmp.name
35
 
@@ -49,14 +51,48 @@ async def generate_story_api(file: UploadFile = File(...)):
49
  content={"error": str(e)}
50
  )
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # Function for processing audio (used by both FastAPI and Gradio)
53
  def transcribe_audio(audio_path):
54
  try:
 
 
 
55
  # Use speech_recognition to transcribe
56
- with sr.AudioFile(audio_path) as source:
57
  audio_data = recognizer.record(source)
58
  # Try to use Google's speech recognition for Arabic
59
  text = recognizer.recognize_google(audio_data, language="ar-AR")
 
 
 
 
 
60
  return text
61
  except sr.UnknownValueError:
62
  return "ู„ู… ูŠุชู… ุงู„ุชุนุฑู ุนู„ู‰ ุงู„ูƒู„ุงู…"
 
8
  from fastapi.staticfiles import StaticFiles
9
  import uvicorn
10
  from pathlib import Path
11
+ from pydub import AudioSegment
12
 
13
  # Create FastAPI app
14
  app = FastAPI(title="Speech to Text Model")
 
29
  @app.post("/generate-story")
30
  async def generate_story_api(file: UploadFile = File(...)):
31
  try:
32
+ # Save uploaded audio to a temp file with original extension
33
+ file_extension = os.path.splitext(file.filename)[1] if file.filename else ".wav"
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp:
35
  tmp.write(await file.read())
36
  tmp_path = tmp.name
37
 
 
51
  content={"error": str(e)}
52
  )
53
 
54
+ # Convert any audio format to WAV
55
+ def convert_to_wav(audio_path):
56
+ try:
57
+ # Get the file extension
58
+ file_extension = os.path.splitext(audio_path)[1].lower()
59
+
60
+ # If already WAV, don't convert
61
+ if file_extension == ".wav":
62
+ return audio_path
63
+
64
+ # Create a new temporary WAV file
65
+ wav_path = os.path.splitext(audio_path)[0] + "_converted.wav"
66
+
67
+ # Convert based on file extension
68
+ if file_extension in [".mp3", ".m4a", ".ogg", ".flac", ".aac"]:
69
+ audio = AudioSegment.from_file(audio_path)
70
+ audio.export(wav_path, format="wav")
71
+ return wav_path
72
+ else:
73
+ # For unknown formats, try a generic approach
74
+ audio = AudioSegment.from_file(audio_path)
75
+ audio.export(wav_path, format="wav")
76
+ return wav_path
77
+ except Exception as e:
78
+ raise Exception(f"Error converting audio format: {str(e)}")
79
+
80
  # Function for processing audio (used by both FastAPI and Gradio)
81
  def transcribe_audio(audio_path):
82
  try:
83
+ # Convert audio to WAV format first
84
+ wav_path = convert_to_wav(audio_path)
85
+
86
  # Use speech_recognition to transcribe
87
+ with sr.AudioFile(wav_path) as source:
88
  audio_data = recognizer.record(source)
89
  # Try to use Google's speech recognition for Arabic
90
  text = recognizer.recognize_google(audio_data, language="ar-AR")
91
+
92
+ # Clean up converted file if it's different from the original
93
+ if wav_path != audio_path and os.path.exists(wav_path):
94
+ os.remove(wav_path)
95
+
96
  return text
97
  except sr.UnknownValueError:
98
  return "ู„ู… ูŠุชู… ุงู„ุชุนุฑู ุนู„ู‰ ุงู„ูƒู„ุงู…"
requirements.txt CHANGED
@@ -4,3 +4,4 @@ fastapi==0.103.1
4
  uvicorn==0.23.2
5
  python-multipart==0.0.6
6
  pydub==0.25.1
 
 
4
  uvicorn==0.23.2
5
  python-multipart==0.0.6
6
  pydub==0.25.1
7
+ ffmpeg-python==0.2.0