Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from transformers import pipeline | |
| from utils.thai_word import ThaiWord | |
| from pythainlp.tokenize import word_tokenize | |
| from collections import deque | |
| from copy import deepcopy | |
| MODEL_NAME = "biodatlab/whisper-th-medium-combined" | |
| DEVICE = 0 if torch.cuda.is_available() else "cpu" | |
| thw = ThaiWord() | |
| # stride_length_s is a tuple of the left and right stride length. | |
| # With only 1 number, both sides get the same stride, by default | |
| # the stride_length on one side is 1/6th of the chunk_length_s | |
| transcriber = pipeline( | |
| "automatic-speech-recognition", | |
| model=MODEL_NAME, | |
| chunk_length_s=30, | |
| device=DEVICE | |
| ) | |
| def transcribe(audio): | |
| result = '' | |
| try: | |
| sr, y = audio | |
| y = y.astype(np.float32) | |
| y /= np.max(np.abs(y)) | |
| text = transcriber( | |
| {"sampling_rate": sr, "raw": y}, | |
| generate_kwargs={"language":"<|th|>", "task":"transcribe"}, | |
| return_timestamps=False, | |
| batch_size=16 | |
| )["text"] | |
| if text is not None: | |
| # pretty text | |
| tokens = word_tokenize(text, engine="attacut", join_broken_num=True) | |
| print(tokens) | |
| result = f'pretty: {thw.pretty(deque(deepcopy(tokens)))}\n\n original: {text}' | |
| else: | |
| result = 'โปรดลองพูดอีกครั้ง' | |
| except Exception as e: | |
| result = f'ไม่สามารถแปลงข้อความเสียงได้ โปรดลองอีกครั้ง\n\nพบข้อผิดพลาด: {str(e)}' | |
| return result | |
| demo = gr.Interface( | |
| transcribe, | |
| gr.Audio(sources=["microphone"]), | |
| "text", | |
| ) | |
| demo.launch() |