Spaces:

GetmanY1
/

finnish_asr

Sleeping

GetmanY1 commited on Apr 2

Commit

99ef49e

1 Parent(s): 78b55cc

Initial commit

Files changed (3) hide show

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-title: Finnish Asr
 emoji: 🏆
-colorFrom: green
 colorTo: indigo
 sdk: gradio
 sdk_version: 5.23.3

 ---
+title: Finnish ASR
 emoji: 🏆
+colorFrom: red
 colorTo: indigo
 sdk: gradio
 sdk_version: 5.23.3

app.py ADDED Viewed

+import gradio as gr
+from transformers import pipeline
+from librosa import resample
+import numpy as np
+def transcribe(input_audio, model_id):
+    pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model_id,
+    device="cpu"
+    )
+    sr, speech = input_audio
+    # Convert to mono if stereo
+    if speech.ndim > 1:
+        speech = speech.mean(axis=1)
+    # Convert to float32 if needed
+    if speech.dtype != "float32":
+        speech = speech.astype(np.float32)
+    # Resample if sampling rate is not 16kHz
+    if sr!=16000:
+        speech = resample(speech, orig_sr=sr, target_sr=16000)
+    output = pipe(speech, chunk_length_s=30, stride_length_s=5)['text']
+    return output
+model_ids_list = [
+ "GetmanY1/wav2vec2-base-fi-150k-finetuned",
+ "GetmanY1/wav2vec2-large-fi-150k-finetuned",
+ "GetmanY1/wav2vec2-xlarge-fi-150k-finetuned"
+]
+gradio_app = gr.Interface(
+    fn=transcribe,
+    inputs=[gr.Audio(sources=["upload","microphone"]), gr.Dropdown(model_ids_list)],
+    outputs="text",
+    title="Finnish Automatic Speech Recognition"
+    description ="Choose a model from the list. Select the Base model for the fastest inference and the XLarge one for the most accurate results."
+)
+if __name__ == "__main__":
+    gradio_app.launch()
+# if __name__ == "__main__":
+#     gradio_app.launch()

requirements.txt ADDED Viewed

+transformers
+torch
+librosa
+samplerate
+resampy