Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,12 @@ import torchaudio
|
|
8 |
import torch
|
9 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# Use writable cache paths
|
12 |
os.environ['HF_HOME'] = '/app/cache'
|
13 |
os.environ['TORCH_HOME'] = '/app/cache'
|
@@ -67,8 +73,9 @@ async def transcribe(audio: UploadFile = File(...)):
|
|
67 |
with torch.no_grad():
|
68 |
phoneme_logits = phoneme_model(phoneme_inputs).logits
|
69 |
phoneme_ids = torch.argmax(phoneme_logits, dim=-1)
|
70 |
-
|
71 |
-
|
|
|
72 |
# Run speech-to-text model
|
73 |
stt_inputs = stt_processor(waveform.squeeze(), sampling_rate=sample_rate, return_tensors="pt").input_values
|
74 |
with torch.no_grad():
|
|
|
8 |
import torch
|
9 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
10 |
|
11 |
+
import re
|
12 |
+
|
13 |
+
def clean_phoneme_string(raw: str) -> str:
|
14 |
+
"""Insert spaces between adjacent uppercase phoneme characters"""
|
15 |
+
return re.sub(r"(?<=[A-Z])(?=[A-Z])", " ", raw).strip()
|
16 |
+
|
17 |
# Use writable cache paths
|
18 |
os.environ['HF_HOME'] = '/app/cache'
|
19 |
os.environ['TORCH_HOME'] = '/app/cache'
|
|
|
73 |
with torch.no_grad():
|
74 |
phoneme_logits = phoneme_model(phoneme_inputs).logits
|
75 |
phoneme_ids = torch.argmax(phoneme_logits, dim=-1)
|
76 |
+
raw_phonemes = phoneme_processor.decode(phoneme_ids[0])
|
77 |
+
phonemes = clean_phoneme_string(raw_phonemes)
|
78 |
+
|
79 |
# Run speech-to-text model
|
80 |
stt_inputs = stt_processor(waveform.squeeze(), sampling_rate=sample_rate, return_tensors="pt").input_values
|
81 |
with torch.no_grad():
|