Spaces:

xujinheng666
/

CS_Quality_Analysis_FinalProject

Running

xujinheng666 commited on Mar 21

Commit

2ba44e2

verified ·

1 Parent(s): 5b9cbca

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -41,6 +41,9 @@ def remove_repeated_phrases(text):
             cleaned_sentences.append(sentence.strip())
     return " ".join(cleaned_sentences)
 def transcribe_audio(audio_path):
     waveform, sample_rate = torchaudio.load(audio_path)
     duration = waveform.shape[1] / sample_rate
@@ -56,11 +59,11 @@ def transcribe_audio(audio_path):
             if os.path.exists(temp_filename):
                 try:
                     result = pipe(temp_filename)["text"]
-                    results.append(result)
                 finally:
                     os.remove(temp_filename)
-        return remove_repeated_phrases(" ".join(results))
-    return remove_repeated_phrases(pipe(audio_path)["text"])
 # Load translation model
 tokenizer = AutoTokenizer.from_pretrained("botisan-ai/mt5-translate-yue-zh")

             cleaned_sentences.append(sentence.strip())
     return " ".join(cleaned_sentences)
+def remove_punctuation(text):
+    return re.sub(r'[^\w\s]', '', text)  # Remove all non-word and non-space characters
 def transcribe_audio(audio_path):
     waveform, sample_rate = torchaudio.load(audio_path)
     duration = waveform.shape[1] / sample_rate
             if os.path.exists(temp_filename):
                 try:
                     result = pipe(temp_filename)["text"]
+                    results.append(remove_punctuation(result))
                 finally:
                     os.remove(temp_filename)
+        return remove_punctuation(remove_repeated_phrases(" ".join(results)))
+    return remove_punctuation(remove_repeated_phrases(pipe(audio_path)["text"]))
 # Load translation model
 tokenizer = AutoTokenizer.from_pretrained("botisan-ai/mt5-translate-yue-zh")