import re from datetime import timedelta import math import os def split_text_into_sentences(text): # 使用正则表达式匹配句末的标点符号或换行符来分割文本 sentences = re.split(r'[\。\?\!\,\n]', text) # 过滤掉空字符串 sentences = [s.strip() for s in sentences if s.strip()] return sentences def generate_srt_content(sentences, start_time=0): srt_content = [] duration_per_four_chinese_chars = 0.7 # 每四个汉字持续0.7秒 duration_per_other_char = 0.2 # 每个其他字符持续0.2秒 current_time = start_time for index, sentence in enumerate(sentences, start=1): chinese_char_count = len(re.findall(r'[\u4e00-\u9fa5]', sentence)) other_char_count = len(sentence) - chinese_char_count # 确保汉字的总持续时间按每四个汉字0.7秒计算 total_chinese_duration = math.ceil(chinese_char_count / 4) * duration_per_four_chinese_chars # 计算非汉字字符的总持续时间 total_other_duration = other_char_count * duration_per_other_char # 总持续时间 total_duration = total_chinese_duration + total_other_duration # 确保总持续时间不会导致时间过长(例如超过1分钟) if total_duration > 60: total_duration = 60 # 获取总秒数 total_seconds_start = current_time total_seconds_end = min(current_time + total_duration, current_time + 60) # 分离秒和毫秒 start_seconds = int(total_seconds_start) start_milliseconds = int((total_seconds_start - start_seconds) * 1000) end_seconds = int(total_seconds_end) end_milliseconds = int((total_seconds_end - end_seconds) * 1000) # 计算开始时间的小时、分钟、秒和毫秒 hours, remainder = divmod(start_seconds, 3600) # 一小时有3600秒 minutes, seconds = divmod(remainder, 60) milliseconds = start_milliseconds start = '{:02d}:{:02d}:{:02d},{:03d}'.format(hours, minutes, seconds, milliseconds) # 计算结束时间的小时、分钟、秒和毫秒 hours, remainder = divmod(end_seconds, 3600) minutes, seconds = divmod(remainder, 60) milliseconds = end_milliseconds end = '{:02d}:{:02d}:{:02d},{:03d}'.format(hours, minutes, seconds, milliseconds) # 构建SRT格式的单条记录 srt_line = f"{index}\n{start} --> {end}\n{sentence}\n" srt_content.append(srt_line) # 更新当前时间以供下一句使用 current_time += total_duration return srt_content def txt_to_srt(txt_file_path, output_srt_file_path): """ 将TXT文件转换为SRT字幕文件。 :param txt_file_path: 输入TXT文件的路径 :param output_srt_file_path: 输出SRT文件的路径 :param start_time: 字幕开始的时间(秒),默认为2秒 """ with open(txt_file_path, 'r', encoding='utf-8') as file: text = file.read() sentences = split_text_into_sentences(text) srt_content = generate_srt_content(sentences, start_time=2) srt_content_str = '\n'.join(srt_content) file_name = 'output_with_audio.srt' with open(os.path.join(output_srt_file_path, file_name), 'w', encoding='utf-8') as file: file.write(srt_content_str) print("SRT文件已生成。")