xujinheng666 commited on
Commit
2ba44e2
·
verified ·
1 Parent(s): 5b9cbca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -41,6 +41,9 @@ def remove_repeated_phrases(text):
41
  cleaned_sentences.append(sentence.strip())
42
  return " ".join(cleaned_sentences)
43
 
 
 
 
44
  def transcribe_audio(audio_path):
45
  waveform, sample_rate = torchaudio.load(audio_path)
46
  duration = waveform.shape[1] / sample_rate
@@ -56,11 +59,11 @@ def transcribe_audio(audio_path):
56
  if os.path.exists(temp_filename):
57
  try:
58
  result = pipe(temp_filename)["text"]
59
- results.append(result)
60
  finally:
61
  os.remove(temp_filename)
62
- return remove_repeated_phrases(" ".join(results))
63
- return remove_repeated_phrases(pipe(audio_path)["text"])
64
 
65
  # Load translation model
66
  tokenizer = AutoTokenizer.from_pretrained("botisan-ai/mt5-translate-yue-zh")
 
41
  cleaned_sentences.append(sentence.strip())
42
  return " ".join(cleaned_sentences)
43
 
44
+ def remove_punctuation(text):
45
+ return re.sub(r'[^\w\s]', '', text) # Remove all non-word and non-space characters
46
+
47
  def transcribe_audio(audio_path):
48
  waveform, sample_rate = torchaudio.load(audio_path)
49
  duration = waveform.shape[1] / sample_rate
 
59
  if os.path.exists(temp_filename):
60
  try:
61
  result = pipe(temp_filename)["text"]
62
+ results.append(remove_punctuation(result))
63
  finally:
64
  os.remove(temp_filename)
65
+ return remove_punctuation(remove_repeated_phrases(" ".join(results)))
66
+ return remove_punctuation(remove_repeated_phrases(pipe(audio_path)["text"]))
67
 
68
  # Load translation model
69
  tokenizer = AutoTokenizer.from_pretrained("botisan-ai/mt5-translate-yue-zh")