carmi commited on
Commit
047dcf7
·
verified ·
1 Parent(s): 3395f36

Upload transcriber.py

Browse files
Files changed (1) hide show
  1. transcriber.py +74 -0
transcriber.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ from datetime import timedelta
4
+
5
+ import librosa
6
+ import torch
7
+ from faster_whisper import WhisperModel
8
+
9
+
10
+ def seconds_to_timestamp(seconds):
11
+ """Convert seconds to VTT timestamp format (HH:MM:SS.mmm)"""
12
+ t = timedelta(seconds=seconds)
13
+ return str(t)[:-3].rjust(11, '0').replace('.', ',')
14
+
15
+
16
+ def write_vtt(segments, output_path):
17
+ with open(output_path, 'w', encoding='utf-8') as f:
18
+ f.write("WEBVTT\n\n")
19
+ for segment in segments:
20
+ start_ts = seconds_to_timestamp(segment.start)
21
+ end_ts = seconds_to_timestamp(segment.end)
22
+ f.write(f"{start_ts} --> {end_ts}\n{segment.text}\n\n")
23
+
24
+
25
+ def transcribe_audio(model, audio_path, word_timestamps=True, vad_filter=True):
26
+ print(f"\nProcessing {audio_path}...")
27
+ with torch.no_grad():
28
+ audio_data, sr = librosa.load(audio_path, sr=None)
29
+ audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=16000)
30
+
31
+ segments, _ = model.transcribe(
32
+ audio_data,
33
+ language='ar',
34
+ word_timestamps=word_timestamps,
35
+ vad_filter=vad_filter
36
+ )
37
+
38
+ for segment in segments:
39
+ if segment.words:
40
+ for word in segment.words:
41
+ print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word))
42
+
43
+ vtt_path = os.path.splitext(audio_path)[0] + ".vtt"
44
+ write_vtt(segments, vtt_path)
45
+ print(f"VTT written to: {vtt_path}")
46
+
47
+
48
+ def main():
49
+ parser = argparse.ArgumentParser(description="Transcribe audio files using Faster-Whisper.")
50
+ parser.add_argument("--model_path", required=True, help="Path to the model directory or file")
51
+ parser.add_argument("--audio_dir", required=True, help="Directory containing audio files (wav/mp3)")
52
+ parser.add_argument("--word_timestamps", type=bool, default=True, help="Enable word timestamps (default: True)")
53
+ parser.add_argument("--vad_filter", type=bool, default=True, help="Enable VAD filtering (default: True)")
54
+ args = parser.parse_args()
55
+
56
+ model = WhisperModel(args.model_path)
57
+
58
+ for file in os.listdir(args.audio_dir):
59
+ if file.endswith(".wav") or file.endswith(".mp3"):
60
+ audio_path = os.path.join(args.audio_dir, file)
61
+ transcribe_audio(
62
+ model,
63
+ audio_path,
64
+ language="ar",
65
+ beam_size=5,
66
+ task="transcribe",
67
+ word_timestamps=args.word_timestamps,
68
+ vad_filter=args.vad_filter,
69
+ vad_parameters=dict(min_silence_duration_ms=2000)
70
+ )
71
+
72
+
73
+ if __name__ == "__main__":
74
+ main()