|
import os
|
|
import tempfile
|
|
import speech_recognition as sr
|
|
import gradio as gr
|
|
from fastapi import FastAPI, File, UploadFile
|
|
from fastapi.responses import JSONResponse
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.staticfiles import StaticFiles
|
|
import uvicorn
|
|
from pathlib import Path
|
|
from pydub import AudioSegment
|
|
|
|
|
|
app = FastAPI(title="Speech to Text Model")
|
|
|
|
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
|
|
recognizer = sr.Recognizer()
|
|
|
|
|
|
@app.post("/generate-story")
|
|
async def generate_story_api(file: UploadFile = File(...)):
|
|
try:
|
|
|
|
file_extension = os.path.splitext(file.filename)[1] if file.filename else ".wav"
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp:
|
|
tmp.write(await file.read())
|
|
tmp_path = tmp.name
|
|
|
|
|
|
transcript = transcribe_audio(tmp_path)
|
|
|
|
|
|
os.remove(tmp_path)
|
|
|
|
|
|
return JSONResponse({
|
|
"transcript": transcript
|
|
})
|
|
except Exception as e:
|
|
return JSONResponse(
|
|
status_code=500,
|
|
content={"error": str(e)}
|
|
)
|
|
|
|
|
|
def convert_to_wav(audio_path):
|
|
try:
|
|
|
|
file_extension = os.path.splitext(audio_path)[1].lower()
|
|
|
|
|
|
if file_extension == ".wav":
|
|
return audio_path
|
|
|
|
|
|
wav_path = os.path.splitext(audio_path)[0] + "_converted.wav"
|
|
|
|
|
|
if file_extension in [".mp3", ".m4a", ".ogg", ".flac", ".aac"]:
|
|
audio = AudioSegment.from_file(audio_path)
|
|
audio.export(wav_path, format="wav")
|
|
return wav_path
|
|
else:
|
|
|
|
audio = AudioSegment.from_file(audio_path)
|
|
audio.export(wav_path, format="wav")
|
|
return wav_path
|
|
except Exception as e:
|
|
raise Exception(f"Error converting audio format: {str(e)}")
|
|
|
|
|
|
def transcribe_audio(audio_path):
|
|
try:
|
|
|
|
wav_path = convert_to_wav(audio_path)
|
|
|
|
|
|
with sr.AudioFile(wav_path) as source:
|
|
audio_data = recognizer.record(source)
|
|
|
|
text = recognizer.recognize_google(audio_data, language="ar-AR")
|
|
|
|
|
|
if wav_path != audio_path and os.path.exists(wav_path):
|
|
os.remove(wav_path)
|
|
|
|
return text
|
|
except sr.UnknownValueError:
|
|
return "لم يتم التعرف على الكلام"
|
|
except sr.RequestError as e:
|
|
return f"حدث خطأ في خدمة التعرف على الصوت: {e}"
|
|
except Exception as e:
|
|
return f"حدث خطأ: {str(e)}"
|
|
|
|
|
|
def gradio_process(audio_file):
|
|
try:
|
|
|
|
audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
|
|
|
|
|
|
transcript = transcribe_audio(audio_path)
|
|
|
|
return transcript
|
|
|
|
except Exception as e:
|
|
return f"حدث خطأ: {str(e)}"
|
|
|
|
|
|
with gr.Blocks(title="Speech to Text Model") as demo:
|
|
gr.Markdown("# Speech to Text")
|
|
gr.Markdown("قم بتسجيل أو تحميل ملف صوتي باللغة العربية وسيقوم النظام بتحويله إلى نص.")
|
|
|
|
with gr.Row():
|
|
audio_input = gr.Audio(label="تسجيل أو تحميل صوت", type="filepath")
|
|
|
|
with gr.Row():
|
|
submit_btn = gr.Button("تحويل إلى نص")
|
|
|
|
with gr.Row():
|
|
transcript_output = gr.Textbox(label="النص المستخرج من التسجيل الصوتي")
|
|
|
|
submit_btn.click(
|
|
fn=gradio_process,
|
|
inputs=audio_input,
|
|
outputs=transcript_output,
|
|
)
|
|
|
|
|
|
frontend_path = Path("../front")
|
|
if frontend_path.exists():
|
|
app.mount("/", StaticFiles(directory=str(frontend_path), html=True), name="frontend")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)
|
|
|