doc2video / txt2srt.py
zhao1977's picture
Upload 15 files
dd74184 verified
import re
from datetime import timedelta
import math
import os
def split_text_into_sentences(text):
# 使用正则表达式匹配句末的标点符号或换行符来分割文本
sentences = re.split(r'[\。\?\!\,\n]', text)
# 过滤掉空字符串
sentences = [s.strip() for s in sentences if s.strip()]
return sentences
def generate_srt_content(sentences, start_time=0):
srt_content = []
duration_per_four_chinese_chars = 0.7 # 每四个汉字持续0.7秒
duration_per_other_char = 0.2 # 每个其他字符持续0.2秒
current_time = start_time
for index, sentence in enumerate(sentences, start=1):
chinese_char_count = len(re.findall(r'[\u4e00-\u9fa5]', sentence))
other_char_count = len(sentence) - chinese_char_count
# 确保汉字的总持续时间按每四个汉字0.7秒计算
total_chinese_duration = math.ceil(chinese_char_count / 4) * duration_per_four_chinese_chars
# 计算非汉字字符的总持续时间
total_other_duration = other_char_count * duration_per_other_char
# 总持续时间
total_duration = total_chinese_duration + total_other_duration
# 确保总持续时间不会导致时间过长(例如超过1分钟)
if total_duration > 60:
total_duration = 60
# 获取总秒数
total_seconds_start = current_time
total_seconds_end = min(current_time + total_duration, current_time + 60)
# 分离秒和毫秒
start_seconds = int(total_seconds_start)
start_milliseconds = int((total_seconds_start - start_seconds) * 1000)
end_seconds = int(total_seconds_end)
end_milliseconds = int((total_seconds_end - end_seconds) * 1000)
# 计算开始时间的小时、分钟、秒和毫秒
hours, remainder = divmod(start_seconds, 3600) # 一小时有3600秒
minutes, seconds = divmod(remainder, 60)
milliseconds = start_milliseconds
start = '{:02d}:{:02d}:{:02d},{:03d}'.format(hours, minutes, seconds, milliseconds)
# 计算结束时间的小时、分钟、秒和毫秒
hours, remainder = divmod(end_seconds, 3600)
minutes, seconds = divmod(remainder, 60)
milliseconds = end_milliseconds
end = '{:02d}:{:02d}:{:02d},{:03d}'.format(hours, minutes, seconds, milliseconds)
# 构建SRT格式的单条记录
srt_line = f"{index}\n{start} --> {end}\n{sentence}\n"
srt_content.append(srt_line)
# 更新当前时间以供下一句使用
current_time += total_duration
return srt_content
def txt_to_srt(txt_file_path, output_srt_file_path):
"""
将TXT文件转换为SRT字幕文件。
:param txt_file_path: 输入TXT文件的路径
:param output_srt_file_path: 输出SRT文件的路径
:param start_time: 字幕开始的时间(秒),默认为2秒
"""
with open(txt_file_path, 'r', encoding='utf-8') as file:
text = file.read()
sentences = split_text_into_sentences(text)
srt_content = generate_srt_content(sentences, start_time=2)
srt_content_str = '\n'.join(srt_content)
file_name = 'output_with_audio.srt'
with open(os.path.join(output_srt_file_path, file_name), 'w', encoding='utf-8') as file:
file.write(srt_content_str)
print("SRT文件已生成。")