greg0rs commited on
Commit
88dc312
·
verified ·
1 Parent(s): 4adedcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -8,6 +8,12 @@ import torchaudio
8
  import torch
9
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
10
 
 
 
 
 
 
 
11
  # Use writable cache paths
12
  os.environ['HF_HOME'] = '/app/cache'
13
  os.environ['TORCH_HOME'] = '/app/cache'
@@ -67,8 +73,9 @@ async def transcribe(audio: UploadFile = File(...)):
67
  with torch.no_grad():
68
  phoneme_logits = phoneme_model(phoneme_inputs).logits
69
  phoneme_ids = torch.argmax(phoneme_logits, dim=-1)
70
- phonemes = phoneme_processor.decode(phoneme_ids[0])
71
-
 
72
  # Run speech-to-text model
73
  stt_inputs = stt_processor(waveform.squeeze(), sampling_rate=sample_rate, return_tensors="pt").input_values
74
  with torch.no_grad():
 
8
  import torch
9
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
10
 
11
+ import re
12
+
13
+ def clean_phoneme_string(raw: str) -> str:
14
+ """Insert spaces between adjacent uppercase phoneme characters"""
15
+ return re.sub(r"(?<=[A-Z])(?=[A-Z])", " ", raw).strip()
16
+
17
  # Use writable cache paths
18
  os.environ['HF_HOME'] = '/app/cache'
19
  os.environ['TORCH_HOME'] = '/app/cache'
 
73
  with torch.no_grad():
74
  phoneme_logits = phoneme_model(phoneme_inputs).logits
75
  phoneme_ids = torch.argmax(phoneme_logits, dim=-1)
76
+ raw_phonemes = phoneme_processor.decode(phoneme_ids[0])
77
+ phonemes = clean_phoneme_string(raw_phonemes)
78
+
79
  # Run speech-to-text model
80
  stt_inputs = stt_processor(waveform.squeeze(), sampling_rate=sample_rate, return_tensors="pt").input_values
81
  with torch.no_grad():