Spaces:
Running
Running
Upload 15 files
Browse files- audio2video.py +58 -0
- audio_generate_each_sentence.py +133 -0
- calculate_durations_for_each_image.py +49 -0
- doc_split.py +73 -0
- json2md.py +125 -0
- main.py +208 -0
- markdown_gather.py +152 -0
- marp2image.py +47 -0
- merge_all_videos.py +25 -0
- movie_editor.py +56 -0
- qwen_plus_marp.py +52 -0
- srt2video.py +22 -0
- srt_generate_for_each_sentence.py +93 -0
- theme_generate.py +68 -0
- txt2srt.py +95 -0
audio2video.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from moviepy.editor import *
|
3 |
+
from glob import glob
|
4 |
+
import re
|
5 |
+
def merge_audio_and_add_to_video(video_path, audio_base_dir, output_path):
|
6 |
+
"""
|
7 |
+
合并多个音频文件并添加到视频中。
|
8 |
+
|
9 |
+
:param video_path: 视频文件的路径。
|
10 |
+
:param audio_base_dir: 包含音频文件夹的基目录。
|
11 |
+
:param output_path: 输出视频的路径。
|
12 |
+
"""
|
13 |
+
# 加载视频文件
|
14 |
+
video_clip = VideoFileClip(video_path)
|
15 |
+
|
16 |
+
# 初始化音频列表
|
17 |
+
audio_clips = []
|
18 |
+
|
19 |
+
silent_audio_start = AudioClip(lambda t: [0,0], duration=2)
|
20 |
+
audio_clips.append(silent_audio_start)
|
21 |
+
|
22 |
+
# 遍历所有子目录,按数字大小排序
|
23 |
+
audio_dirs = glob(os.path.join(audio_base_dir, "audio_for_paragraph_*"))
|
24 |
+
audio_dirs.sort(key=lambda x: int(re.search(r'\d+', os.path.basename(x)).group()))
|
25 |
+
|
26 |
+
# 遍历所有子目录
|
27 |
+
for audio_dir in audio_dirs:
|
28 |
+
# 获取当前目录的index
|
29 |
+
index = int(os.path.basename(audio_dir).split("_")[-1])
|
30 |
+
|
31 |
+
# 遍历目录中的所有mp3文件
|
32 |
+
mp3_files = glob(os.path.join(audio_dir, f"paragraph_{index}_sentence_*.mp3"))
|
33 |
+
mp3_files.sort(key=lambda x: int(re.search(r'_sentence_(\d+)', os.path.basename(x)).group(1)))
|
34 |
+
|
35 |
+
# 遍历排序后的mp3文件列表
|
36 |
+
for mp3_file in mp3_files:
|
37 |
+
# 加载音频文件
|
38 |
+
audio_clip = AudioFileClip(mp3_file)
|
39 |
+
|
40 |
+
# 添加到音频列表
|
41 |
+
if audio_clips:
|
42 |
+
# 如果不是第一个音频,则在前一个音频之后添加0.5秒的静音
|
43 |
+
# 替换原有的 AudioNullClip 代码
|
44 |
+
silent_audio = AudioClip(lambda t: [0,0], duration=0.3)
|
45 |
+
audio_clips.append(silent_audio)
|
46 |
+
audio_clips.append(audio_clip)
|
47 |
+
|
48 |
+
# 合并所有音频片段
|
49 |
+
final_audio = concatenate_audioclips(audio_clips)
|
50 |
+
|
51 |
+
# 将音频添加到视频中
|
52 |
+
video_with_audio = video_clip.set_audio(final_audio)
|
53 |
+
|
54 |
+
# 输出带有新音频的视频文件
|
55 |
+
video_with_audio.write_videofile(output_path, codec='libx264', audio_codec='aac')
|
56 |
+
|
57 |
+
# 关闭剪辑对象,释放资源
|
58 |
+
video_clip.close()
|
audio_generate_each_sentence.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import time
|
5 |
+
import dashscope
|
6 |
+
from dashscope.audio.tts_v2 import SpeechSynthesizer
|
7 |
+
|
8 |
+
import traceback
|
9 |
+
|
10 |
+
def read_json_file(file_path):
|
11 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
12 |
+
data = json.load(file)
|
13 |
+
return data
|
14 |
+
|
15 |
+
|
16 |
+
def split_into_sentences(text):
|
17 |
+
# 中文标点符号列表
|
18 |
+
punctuation = [',', '。', ';', '?', '!']
|
19 |
+
brackets = {'(': ')', '[': ']', '{': '}', '(': ')', '【': '】', '《': '》'}
|
20 |
+
|
21 |
+
# 初始化结果列表和临时句子存储
|
22 |
+
sentences = []
|
23 |
+
temp_sentence = ''
|
24 |
+
bracket_stack = []
|
25 |
+
|
26 |
+
# 遍历文本中的每一个字符
|
27 |
+
for char in text:
|
28 |
+
# 如果是左括号,压入栈
|
29 |
+
if char in brackets:
|
30 |
+
bracket_stack.append(char)
|
31 |
+
# 如果是右括号且与栈顶匹配,弹出栈
|
32 |
+
elif char in brackets.values() and bracket_stack and brackets[bracket_stack[-1]] == char:
|
33 |
+
bracket_stack.pop()
|
34 |
+
|
35 |
+
# 如果字符是中文标点之一且括号栈为空,表示句子结束
|
36 |
+
if char in punctuation and not bracket_stack:
|
37 |
+
# 添加临时句子到结果列表,并清空临时句子
|
38 |
+
sentences.append(temp_sentence.strip())
|
39 |
+
temp_sentence = ''
|
40 |
+
else:
|
41 |
+
# 否则,将字符添加到临时句子中
|
42 |
+
temp_sentence += char
|
43 |
+
|
44 |
+
# 处理最后一个可能没有标点结尾的句子
|
45 |
+
if temp_sentence:
|
46 |
+
sentences.append(temp_sentence.strip())
|
47 |
+
|
48 |
+
return sentences
|
49 |
+
|
50 |
+
|
51 |
+
def save_sentences_to_markdown(sentences, base_dir, index1):
|
52 |
+
for index2, sentence in enumerate(sentences, start=1):
|
53 |
+
# 创建目录
|
54 |
+
dir_name = f'audio_for_paragraph_{index1}'
|
55 |
+
dir_path = os.path.join(base_dir, dir_name)
|
56 |
+
os.makedirs(dir_path, exist_ok=True)
|
57 |
+
|
58 |
+
# 构建文件名
|
59 |
+
file_name = f'paragraph_{index1}_sentence_{index2}.md'
|
60 |
+
file_path = os.path.join(dir_path, file_name)
|
61 |
+
|
62 |
+
# 写入Markdown文件
|
63 |
+
with open(file_path, 'w', encoding='utf-8') as file:
|
64 |
+
file.write(sentence + '\n')
|
65 |
+
|
66 |
+
|
67 |
+
def process_json_file(json_file_path, base_dir):
|
68 |
+
|
69 |
+
if not os.path.exists(base_dir):
|
70 |
+
os.makedirs(base_dir)
|
71 |
+
|
72 |
+
file_prefix = os.path.splitext(os.path.basename(json_file_path))[0]
|
73 |
+
|
74 |
+
base_dir = os.path.join(base_dir, file_prefix)
|
75 |
+
|
76 |
+
# 读取JSON文件
|
77 |
+
json_data = read_json_file(json_file_path)
|
78 |
+
|
79 |
+
# 处理JSON数据中的每个条目
|
80 |
+
for index1, item in enumerate(json_data):
|
81 |
+
if 'content' in item:
|
82 |
+
content = item['content']
|
83 |
+
# 检查content是否为链接
|
84 |
+
if not is_url(content):
|
85 |
+
sentences = split_into_sentences(content)
|
86 |
+
save_sentences_to_markdown(sentences, base_dir, index1+1)
|
87 |
+
|
88 |
+
def is_url(s):
|
89 |
+
url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
|
90 |
+
return bool(url_pattern.match(s))
|
91 |
+
|
92 |
+
|
93 |
+
def synthesize_md_to_speech(base_directory):
|
94 |
+
"""
|
95 |
+
识别指定目录下的所有.md文件,读取其内容并使用DashScope API将其转换为语音,
|
96 |
+
保存为同名.mp3文件在同一目录下。
|
97 |
+
|
98 |
+
参数:
|
99 |
+
base_directory (str): 包含.md文件的顶层目录路径。
|
100 |
+
"""
|
101 |
+
# 确保环境变量中存在DashScope API密钥
|
102 |
+
if 'DASHSCOPE_API_KEY' not in os.environ:
|
103 |
+
raise ValueError("DashScope API key must be set in the environment variables.")
|
104 |
+
|
105 |
+
# 遍历指定目录及其子目录
|
106 |
+
for root, dirs, files in os.walk(base_directory):
|
107 |
+
for file in files:
|
108 |
+
if file.endswith('.md'):
|
109 |
+
# 构建完整文件路径
|
110 |
+
md_file_path = os.path.join(root, file)
|
111 |
+
|
112 |
+
# 读取.md文件内容
|
113 |
+
with open(md_file_path, 'r', encoding='utf-8') as f:
|
114 |
+
text = f.read()
|
115 |
+
|
116 |
+
# 初始化语音合成器
|
117 |
+
speech_synthesizer = SpeechSynthesizer(model='cosyvoice-v1', voice='longxiaochun')
|
118 |
+
|
119 |
+
|
120 |
+
# 合成语音
|
121 |
+
audio_data = speech_synthesizer.call(text)
|
122 |
+
|
123 |
+
# 构建输出.mp3文件路径
|
124 |
+
mp3_file_path = os.path.splitext(md_file_path)[0] + '.mp3'
|
125 |
+
|
126 |
+
# 保存音频到文件
|
127 |
+
with open(mp3_file_path, 'wb') as f:
|
128 |
+
f.write(audio_data)
|
129 |
+
|
130 |
+
print(f'Synthesized text from file "{md_file_path}" to file: {mp3_file_path}')
|
131 |
+
|
132 |
+
|
133 |
+
|
calculate_durations_for_each_image.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pydub import AudioSegment
|
3 |
+
|
4 |
+
def calculate_audio_durations(directory):
|
5 |
+
"""
|
6 |
+
计算指定目录下所有以 audio_for_paragraph_{index} 命名的文件夹中 mp3 文件的总持续时间(以秒为单位)。
|
7 |
+
|
8 |
+
参数:
|
9 |
+
directory (str): 需要扫描的根目录路径。
|
10 |
+
|
11 |
+
返回:
|
12 |
+
list: 每个 audio_for_paragraph_{index} 文件夹中 mp3 文件总持续时间(秒)的列表。
|
13 |
+
"""
|
14 |
+
# 初始化结果列表
|
15 |
+
durations = []
|
16 |
+
|
17 |
+
# 遍历目录下的所有子目录
|
18 |
+
for entry in os.scandir(directory):
|
19 |
+
if entry.is_dir() and entry.name.startswith("audio_for_paragraph_"):
|
20 |
+
# 提取 index
|
21 |
+
index = int(entry.name.split("_")[-1])
|
22 |
+
|
23 |
+
# 初始化当前文件夹的总持续时间为0
|
24 |
+
total_duration_ms = 0
|
25 |
+
|
26 |
+
# 遍历子目录中的所有文件
|
27 |
+
for file_entry in os.scandir(entry.path):
|
28 |
+
if file_entry.name.endswith(".mp3"):
|
29 |
+
# 加载 mp3 文件并计算持续时间
|
30 |
+
audio = AudioSegment.from_mp3(file_entry.path)
|
31 |
+
delay = 300
|
32 |
+
total_duration_ms += len(audio) + delay
|
33 |
+
|
34 |
+
# 将当前文件夹的总持续时间转换为秒,并添加到结果列表中
|
35 |
+
total_duration_seconds = total_duration_ms / 1000.0
|
36 |
+
durations.append((index, total_duration_seconds))
|
37 |
+
|
38 |
+
# 按照 index 排序结果列表
|
39 |
+
durations.sort(key=lambda x: x[0])
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
# 只保留持续时间(秒)
|
44 |
+
durations = [duration for _, duration in durations]
|
45 |
+
|
46 |
+
durations.insert(0, 2)
|
47 |
+
|
48 |
+
return durations
|
49 |
+
|
doc_split.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from http import HTTPStatus
|
2 |
+
import dashscope
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
def doc_split_with_qwen_plus(input_filepath, output_filepath):
|
6 |
+
|
7 |
+
if not os.path.exists(output_filepath):
|
8 |
+
os.makedirs(output_filepath)
|
9 |
+
|
10 |
+
with open(input_filepath, 'r', encoding='utf-8') as file:
|
11 |
+
content = file.read()
|
12 |
+
|
13 |
+
prompt = """
|
14 |
+
执行文档处理任务,包括分段与自动生成段落标题,需遵循以下具体细则:
|
15 |
+
|
16 |
+
1. **分段逻辑**:仔细分析文档内容,根据其内在语义逻辑合理划分段落。
|
17 |
+
|
18 |
+
2. **标题创作**:为每一独立段落设计一个精炼标题,确保该标题简洁明了(不超过10个字),并能有效准确地概括该段落核心信息。
|
19 |
+
|
20 |
+
3. **输出规格**:完成处理后,生成的文档结构需符合JSON格式标准,每段落及对应的标题组成一个条目,具体格式示例如下:
|
21 |
+
|
22 |
+
|
23 |
+
[
|
24 |
+
{"title": " ", "content": " "},
|
25 |
+
{"title": " ", "content": " "},
|
26 |
+
...
|
27 |
+
]
|
28 |
+
|
29 |
+
输出内容是以"["开头,并以"]"收尾的JSON数据,请不要输出其他内容。
|
30 |
+
|
31 |
+
4. **原文忠实性**:在输出的JSON数据中,各段落的“content”字段必须精确匹配原始文档的文字内容,不得有增删改动。必须完整地处理原始文档的全部内容,不能有遗漏。请严格保证文字和链接在原文档中的相对位置保持不变。
|
32 |
+
|
33 |
+
5. **格式化链接**:对于文档中的markdown格式的图片链接,将他们单独保存到JSON条目中。其"title"为"链接{index}","content"为链接地址,其中index为索引顺序。
|
34 |
+
|
35 |
+
6. **内容限制**:输出内容中不得包含任何多余的空格、换行符、制表符等空白字符,也不得包含任何HTML、XML、Markdown等格式的符号。始终保持中文。
|
36 |
+
|
37 |
+
请严格依据上述要求执行文档处理任务。
|
38 |
+
|
39 |
+
文档内容如下:
|
40 |
+
"""
|
41 |
+
|
42 |
+
messages = [{
|
43 |
+
'role': 'user',
|
44 |
+
'content': f"""
|
45 |
+
"{prompt}"
|
46 |
+
"{content}"
|
47 |
+
"""
|
48 |
+
}]
|
49 |
+
|
50 |
+
response_content = ''
|
51 |
+
responses = dashscope.Generation.call("qwen-plus",
|
52 |
+
messages=messages,
|
53 |
+
result_format='message',
|
54 |
+
stream=True,
|
55 |
+
incremental_output=True)
|
56 |
+
|
57 |
+
for response in responses:
|
58 |
+
if response.status_code == HTTPStatus.OK:
|
59 |
+
response_content += response.output.choices[0]['message']['content']
|
60 |
+
else:
|
61 |
+
print('Request id: %s, Status code: %s, error code: %s, error message: %s' % (
|
62 |
+
response.request_id, response.status_code,
|
63 |
+
response.code, response.message
|
64 |
+
))
|
65 |
+
if response_content.startswith("```") and response_content.endswith("```"):
|
66 |
+
response_content = response_content[8:-3].strip()
|
67 |
+
|
68 |
+
input_base_name = os.path.splitext(os.path.basename(input_filepath))[0]
|
69 |
+
output_file_path = os.path.join(output_filepath, f'{input_base_name}.json')
|
70 |
+
with open(output_file_path, 'w', encoding='utf-8') as json_file:
|
71 |
+
json.dump(json.loads(response_content), json_file, ensure_ascii=False, indent=4)
|
72 |
+
|
73 |
+
return response_content
|
json2md.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
from qwen_plus_marp import call_with_stream
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
def is_link(text):
|
8 |
+
"""检查给定的文本是否为链接。"""
|
9 |
+
return text.startswith("http://") or text.startswith("https://")
|
10 |
+
|
11 |
+
def parse_json_list_to_markdown(json_list, theme_url):
|
12 |
+
"""
|
13 |
+
将 JSON 对象列表转换为 Markdown 格式,并通过 call_with_stream 处理 'content'。
|
14 |
+
|
15 |
+
参数:
|
16 |
+
- json_list (list): 字典列表,每个字典都包含 'title' 和 'content' 键。
|
17 |
+
|
18 |
+
返回:
|
19 |
+
- str: Markdown 格式的字符串。
|
20 |
+
"""
|
21 |
+
theme = "\n\n"
|
22 |
+
theme_filename = Path(theme_url).name
|
23 |
+
markdown_content = ""
|
24 |
+
for item in json_list:
|
25 |
+
title = item.get('title', '未命名')
|
26 |
+
processed_content = call_with_stream(item.get('content', ''))
|
27 |
+
|
28 |
+
if is_link(processed_content):
|
29 |
+
markdown_content += f"---\n\n\n\n---"
|
30 |
+
else:
|
31 |
+
markdown_content += f"\n\n# {title}\n\n{processed_content}\n\n---"
|
32 |
+
return markdown_content
|
33 |
+
|
34 |
+
|
35 |
+
def parse_json_list_to_markdown_new(json_list, theme_url):
|
36 |
+
"""
|
37 |
+
将 JSON 对象列表转换为 Markdown 格式,并通过 call_with_stream 处理 'content'。
|
38 |
+
|
39 |
+
参数:
|
40 |
+
- json_list (list): 字典列表,每个字典都包含 'title' 和 'content' 键。
|
41 |
+
|
42 |
+
返回:
|
43 |
+
- str: Markdown 格式的字符串。
|
44 |
+
"""
|
45 |
+
theme_filename = Path(theme_url).name
|
46 |
+
theme = f"\n\n"
|
47 |
+
|
48 |
+
markdown_content = ""
|
49 |
+
|
50 |
+
for i, item in enumerate(json_list):
|
51 |
+
title = item.get('title', '未命名')
|
52 |
+
processed_content = call_with_stream(item.get('content', ''))
|
53 |
+
if processed_content.startswith("```") and processed_content.endswith("```"):
|
54 |
+
processed_content = processed_content[11:-3].strip()
|
55 |
+
|
56 |
+
if not is_link(json_list[i].get('content')):
|
57 |
+
# 如果是列表中的最后一个元素
|
58 |
+
if i == len(json_list) - 1:
|
59 |
+
markdown_content += f"\n\n## {title}\n\n{processed_content}\n\n{theme}\n\n---"
|
60 |
+
else:
|
61 |
+
if not is_link(json_list[i + 1].get('content')):
|
62 |
+
# 当前不是链接且下一个也不是链接
|
63 |
+
markdown_content += f"\n\n## {title}\n\n{processed_content}\n\n{theme}\n\n---"
|
64 |
+
else:
|
65 |
+
# 当前不是链接但下一个是链接
|
66 |
+
markdown_content += f"\n\n## {title}\n\n{processed_content}\n\n---"
|
67 |
+
else:
|
68 |
+
# 当前是链接
|
69 |
+
markdown_content += f"---\n\n})\n\n---"
|
70 |
+
|
71 |
+
return markdown_content
|
72 |
+
|
73 |
+
|
74 |
+
def convert_json_file_to_md(json_file_path, output_dir, theme_url):
|
75 |
+
"""
|
76 |
+
读取 JSON 文件,通过 call_with_stream 转换其内容,然后保存为 Markdown 文件。
|
77 |
+
|
78 |
+
参数:
|
79 |
+
- json_file_path (str): JSON 文件的路径。
|
80 |
+
- output_dir (str): Markdown 文件将被保存的目录。
|
81 |
+
"""
|
82 |
+
|
83 |
+
if not os.path.exists(output_dir):
|
84 |
+
os.makedirs(output_dir)
|
85 |
+
|
86 |
+
with open(json_file_path, 'r', encoding='utf-8') as file:
|
87 |
+
json_data = json.load(file)
|
88 |
+
|
89 |
+
markdown_content = parse_json_list_to_markdown_new(json_data, theme_url)
|
90 |
+
|
91 |
+
base_name = os.path.splitext(os.path.basename(json_file_path))[0]
|
92 |
+
md_file_name = f"{base_name}.md"
|
93 |
+
output_path = os.path.join(output_dir, md_file_name)
|
94 |
+
|
95 |
+
with open(output_path, 'w', encoding='utf-8') as file:
|
96 |
+
file.write(markdown_content)
|
97 |
+
|
98 |
+
def save_markdown_to_file(content, filename):
|
99 |
+
""" 保存Markdown内容到文件 """
|
100 |
+
with open(filename, 'w', encoding='utf-8') as file:
|
101 |
+
file.write(content)
|
102 |
+
|
103 |
+
def process_markdown(input_file):
|
104 |
+
""" 处理Markdown文本,按要求分割并保存 """
|
105 |
+
with open(input_file, 'r', encoding='utf-8') as file:
|
106 |
+
input_text = file.read()
|
107 |
+
|
108 |
+
# 使用正则表达式确保每个部分都包含 "---"
|
109 |
+
parts = re.split(r'(?<=---\n)', input_text)
|
110 |
+
|
111 |
+
# 移除空字符串部分
|
112 |
+
parts = [part.strip() for part in parts if part.strip()]
|
113 |
+
|
114 |
+
filenames = []
|
115 |
+
base_path = os.path.dirname(input_file) # 获取基础文件的路径
|
116 |
+
|
117 |
+
for i, part in enumerate(parts):
|
118 |
+
# 生成文件名
|
119 |
+
filename = f'{os.path.splitext(os.path.basename(input_file))[0]}_{i}.md'
|
120 |
+
# 构建完整路径
|
121 |
+
full_filename = os.path.join(base_path, filename)
|
122 |
+
save_markdown_to_file(f'{part}', full_filename)
|
123 |
+
filenames.append(full_filename)
|
124 |
+
|
125 |
+
|
main.py
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import datetime
|
3 |
+
import os
|
4 |
+
import shutil
|
5 |
+
import gradio as gr
|
6 |
+
from threading import Thread
|
7 |
+
|
8 |
+
# 导入你的模块
|
9 |
+
from doc_split import doc_split_with_qwen_plus
|
10 |
+
from json2md import convert_json_file_to_md, process_markdown
|
11 |
+
from markdown_gather import merge_style_with_md_files, remove_trailing_dashes, insert_logo, remove_empty_lines, title_to_md
|
12 |
+
from marp2image import convert_md_files_to_png
|
13 |
+
from audio_generate_each_sentence import process_json_file, synthesize_md_to_speech
|
14 |
+
from srt_generate_for_each_sentence import generate_srt_from_audio
|
15 |
+
from calculate_durations_for_each_image import calculate_audio_durations
|
16 |
+
from movie_editor import images_to_video_with_durations
|
17 |
+
from audio2video import merge_audio_and_add_to_video
|
18 |
+
from srt2video import merge_video_and_subtitle
|
19 |
+
from theme_generate import theme_generate_with_qwen_plus
|
20 |
+
|
21 |
+
def main(args, progress_callback, log_callback):
|
22 |
+
try:
|
23 |
+
# 记录开始时间
|
24 |
+
start_time = datetime.datetime.now()
|
25 |
+
progress_callback(f"开始时间: {start_time.strftime('%Y年%m月%d日 %H时%M分%S秒')}")
|
26 |
+
log_callback(f"开始时间: {start_time.strftime('%Y年%m月%d日 %H时%M分%S秒')}")
|
27 |
+
|
28 |
+
# 生成带有时间戳的文件夹名
|
29 |
+
timestamp = start_time.strftime("%Y%m%d_%H%M%S")
|
30 |
+
material_folder = f"material_{timestamp}"
|
31 |
+
|
32 |
+
# 创建输出保存路径,将渲染素材复制到指定路径下
|
33 |
+
input_base_name = os.path.splitext(os.path.basename(args.input_txt_path))[0]
|
34 |
+
|
35 |
+
# 更新各个路径,使用新的文件夹名
|
36 |
+
args.json_path = os.path.join(material_folder, "json")
|
37 |
+
args.image_path = os.path.join(material_folder, "image")
|
38 |
+
args.audio_path = os.path.join(material_folder, "audio")
|
39 |
+
args.markdown_path = os.path.join(material_folder, "markdown")
|
40 |
+
args.srt_and_video_path = os.path.join(material_folder, "video")
|
41 |
+
|
42 |
+
# 创建必要的文件夹
|
43 |
+
folders_to_create = [
|
44 |
+
material_folder, args.markdown_path, args.json_path,
|
45 |
+
args.image_path, args.audio_path, args.srt_and_video_path
|
46 |
+
]
|
47 |
+
for folder in folders_to_create:
|
48 |
+
if not os.path.exists(folder):
|
49 |
+
os.makedirs(folder)
|
50 |
+
log_callback(f"创建文件夹: {folder}")
|
51 |
+
|
52 |
+
# 复制样式文件
|
53 |
+
if os.path.exists(args.input_style_path):
|
54 |
+
for filename in os.listdir(args.input_style_path):
|
55 |
+
full_path = os.path.join(args.input_style_path, filename)
|
56 |
+
if os.path.isfile(full_path):
|
57 |
+
shutil.copy2(full_path, args.markdown_path)
|
58 |
+
log_callback(f"样式文件已复制到: {args.markdown_path}")
|
59 |
+
else:
|
60 |
+
log_callback(f"警告: 输入样式文件夹路径 {args.input_style_path} 不存在,跳过复制操作。", is_warning=True)
|
61 |
+
|
62 |
+
# 通过API调用通义千问-Plus为输入文档生成文档标题
|
63 |
+
theme = theme_generate_with_qwen_plus(args.input_txt_path, args.title)
|
64 |
+
progress_callback(f"生成的文档标题: {theme}")
|
65 |
+
log_callback(f"生成的文档标题: {theme}")
|
66 |
+
|
67 |
+
# 通过API调用通义千问-Plus为输入文档划分段落,并为每一个段落生成一个段落标题
|
68 |
+
doc_split_with_qwen_plus(args.input_txt_path, args.json_path)
|
69 |
+
progress_callback(f"文档已分割并保存到: {args.json_path}")
|
70 |
+
log_callback(f"文档已分割并保存到: {args.json_path}")
|
71 |
+
|
72 |
+
# 总结各段落内容,保存为Markdown格式,并设置背景图片,可自行将style文件夹下的theme.png替换为自定义背景
|
73 |
+
for filename in os.listdir(args.json_path):
|
74 |
+
if filename.endswith('.json'):
|
75 |
+
json_file_path = os.path.join(args.json_path, filename)
|
76 |
+
convert_json_file_to_md(json_file_path, args.markdown_path, args.theme_path)
|
77 |
+
log_callback(f"转换 {json_file_path} 到 Markdown 格式并保存到: {args.markdown_path}")
|
78 |
+
|
79 |
+
# 将文档标题添加到Markdown文件开头作为标题页,并设置标题页背景,可自行将style文件夹下的title.png替换为自定义标题页背景
|
80 |
+
title_to_md(os.path.join(args.markdown_path, f'{input_base_name}.md'), theme, args.title_path)
|
81 |
+
log_callback(f"文档标题已添加到 Markdown 文件: {os.path.join(args.markdown_path, f'{input_base_name}.md')}")
|
82 |
+
|
83 |
+
# 删除空行,符合Marp格式
|
84 |
+
remove_empty_lines(os.path.join(args.markdown_path, f'{input_base_name}.md'))
|
85 |
+
log_callback(f"删除空行: {os.path.join(args.markdown_path, f'{input_base_name}.md')}")
|
86 |
+
|
87 |
+
# 添加阿里云logo。可自行替换为其他logo:将logo图片命名为logo.png,放到style文件夹下
|
88 |
+
insert_logo(os.path.join(args.markdown_path, f'{input_base_name}.md'), os.path.join(args.logo_path))
|
89 |
+
log_callback(f"插入logo到 Markdown 文件: {os.path.join(args.markdown_path, f'{input_base_name}.md')}")
|
90 |
+
|
91 |
+
process_markdown(os.path.join(args.markdown_path, f'{input_base_name}.md'))
|
92 |
+
log_callback(f"处理 Markdown 文件: {os.path.join(args.markdown_path, f'{input_base_name}.md')}")
|
93 |
+
|
94 |
+
# 定义并添加Marp样式文件。可查阅Marp官方文档自定义样式:将样式文件命名为style.md,放到style文件夹下
|
95 |
+
merge_style_with_md_files(args.markdown_path, args.markdown_style_path)
|
96 |
+
log_callback(f"合并样式文件到 Markdown 文件: {args.markdown_path}")
|
97 |
+
|
98 |
+
# 删除Markdown文件末尾的“---”,避免生成空白图片
|
99 |
+
remove_trailing_dashes(args.markdown_path)
|
100 |
+
log_callback(f"删除 Markdown 文件末尾的 '---': {args.markdown_path}")
|
101 |
+
|
102 |
+
# 使用Marp生成演示文稿图片
|
103 |
+
convert_md_files_to_png(os.path.join(args.markdown_path, f'{input_base_name}.md'), args.image_path)
|
104 |
+
log_callback(f"生成图片: {args.image_path}")
|
105 |
+
|
106 |
+
# 将各段落文档划分为若干句子,并通过API调用CosyVoice合成语音
|
107 |
+
process_json_file(os.path.join(args.json_path, f'{input_base_name}.json'), args.audio_path)
|
108 |
+
synthesize_md_to_speech(os.path.join(args.audio_path, input_base_name))
|
109 |
+
log_callback(f"合成语音: {args.audio_path}")
|
110 |
+
|
111 |
+
# 生成srt字幕文件
|
112 |
+
generate_srt_from_audio(os.path.join(args.audio_path, input_base_name), args.srt_and_video_path,
|
113 |
+
os.path.join(args.srt_and_video_path, input_base_name))
|
114 |
+
log_callback(f"生成SRT字幕文件: {args.srt_and_video_path}")
|
115 |
+
|
116 |
+
# 计算各段落的所有音频时长
|
117 |
+
durations = calculate_audio_durations(os.path.join(args.audio_path, input_base_name))
|
118 |
+
log_callback(f"计算音频时长: {durations}")
|
119 |
+
|
120 |
+
# 将所有图片剪辑为视频
|
121 |
+
images_to_video_with_durations(os.path.join(args.image_path, f'{input_base_name}'), args.srt_and_video_path,
|
122 |
+
durations, args.fps, input_base_name)
|
123 |
+
log_callback(f"生成视频: {args.srt_and_video_path}")
|
124 |
+
|
125 |
+
# 将音频文件嵌入视频
|
126 |
+
merge_audio_and_add_to_video(os.path.join(args.srt_and_video_path, f'{input_base_name}.mp4'),
|
127 |
+
os.path.join(args.audio_path, f'{input_base_name}'),
|
128 |
+
os.path.join(args.srt_and_video_path, f'{input_base_name}_with_audio.mp4'))
|
129 |
+
log_callback(f"合并音频到视频: {args.srt_and_video_path}")
|
130 |
+
|
131 |
+
# 将字幕文件嵌入视频
|
132 |
+
merge_video_and_subtitle(args.srt_and_video_path, input_base_name)
|
133 |
+
log_callback(f"合并字幕到视频: {args.srt_and_video_path}")
|
134 |
+
|
135 |
+
# 记录结束时间
|
136 |
+
end_time = datetime.datetime.now()
|
137 |
+
progress_callback(f"结束时间: {end_time.strftime('%Y年%m月%d日 %H时%M分%S秒')}")
|
138 |
+
log_callback(f"结束时间: {end_time.strftime('%Y年%m月%d日 %H时%M分%S秒')}")
|
139 |
+
|
140 |
+
# 计算总时间
|
141 |
+
elapsed_time = end_time - start_time
|
142 |
+
elapsed_hours, remainder = divmod(elapsed_time.total_seconds(), 3600)
|
143 |
+
elapsed_minutes, elapsed_seconds = divmod(remainder, 60)
|
144 |
+
|
145 |
+
elapsed_time_str = f"{int(elapsed_hours)}时{int(elapsed_minutes)}分{int(elapsed_seconds)}秒"
|
146 |
+
progress_callback(f"总时间: {elapsed_time_str}")
|
147 |
+
log_callback(f"总时间: {elapsed_time_str}")
|
148 |
+
|
149 |
+
except Exception as e:
|
150 |
+
log_callback(f"发生错误: {str(e)}", is_error=True)
|
151 |
+
progress_callback(f"发生错误: {str(e)}", is_error=True)
|
152 |
+
raise e
|
153 |
+
|
154 |
+
def run_conversion(input_txt_path, fps, title):
|
155 |
+
args = argparse.Namespace(
|
156 |
+
input_txt_path=input_txt_path,
|
157 |
+
input_style_path='./style',
|
158 |
+
markdown_style_path='./style/style.md',
|
159 |
+
logo_path='./style/logo.png',
|
160 |
+
theme_path='./style/theme.png',
|
161 |
+
title_path='./style/title.png',
|
162 |
+
json_path='./material/json',
|
163 |
+
image_path='./material/image',
|
164 |
+
audio_path='./material/audio',
|
165 |
+
markdown_path='./material/markdown',
|
166 |
+
srt_and_video_path='./material/video',
|
167 |
+
fps=int(fps),
|
168 |
+
title=title
|
169 |
+
)
|
170 |
+
|
171 |
+
log_text = []
|
172 |
+
|
173 |
+
def progress_callback(message, is_error=False):
|
174 |
+
log_text.append(message)
|
175 |
+
if is_error:
|
176 |
+
log_text.append(f"错误: {message}")
|
177 |
+
|
178 |
+
def log_callback(message, is_warning=False, is_error=False):
|
179 |
+
log_text.append(message)
|
180 |
+
if is_warning:
|
181 |
+
log_text.append(f"警告: {message}")
|
182 |
+
elif is_error:
|
183 |
+
log_text.append(f"错误: {message}")
|
184 |
+
|
185 |
+
def video_generation_done():
|
186 |
+
log_text.append("视频生成成功!")
|
187 |
+
|
188 |
+
main(args, progress_callback, log_callback)
|
189 |
+
video_generation_done()
|
190 |
+
return "\n".join(log_text)
|
191 |
+
|
192 |
+
def gradio_interface():
|
193 |
+
iface = gr.Interface(
|
194 |
+
fn=run_conversion,
|
195 |
+
inputs=[
|
196 |
+
gr.Textbox(lines=1, placeholder="输入文本路径", label="输入文本路径"),
|
197 |
+
gr.Number(value=30, label="帧率"),
|
198 |
+
gr.Textbox(lines=1, placeholder="视频标题", label="视频标题")
|
199 |
+
],
|
200 |
+
outputs=gr.Textbox(label="日志输出"),
|
201 |
+
title="文档生成视频",
|
202 |
+
description="将文档转换为带有音频和字幕的视频。",
|
203 |
+
live=False
|
204 |
+
)
|
205 |
+
iface.launch(share=True) # 添加 share=True
|
206 |
+
|
207 |
+
if __name__ == "__main__":
|
208 |
+
gradio_interface()
|
markdown_gather.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
from pathlib import Path
|
4 |
+
import os
|
5 |
+
|
6 |
+
|
7 |
+
def merge_style_with_md_files(md_file_path, style_file_path):
|
8 |
+
# 检查样式文件是否存在
|
9 |
+
if not os.path.isfile(style_file_path):
|
10 |
+
raise FileNotFoundError(f"样式文件 {style_file_path} 不存在。")
|
11 |
+
|
12 |
+
# 读取样式文件内容
|
13 |
+
with open(style_file_path, 'r', encoding='utf-8') as f:
|
14 |
+
style_content = f.read()
|
15 |
+
|
16 |
+
# 遍历指定目录下的所有文件
|
17 |
+
for filename in os.listdir(md_file_path):
|
18 |
+
if filename.startswith('section') and filename.endswith('.md'):
|
19 |
+
file_path = os.path.join(md_file_path, filename)
|
20 |
+
# 合并样式内容与 .md 文件内容
|
21 |
+
if os.path.exists(file_path):
|
22 |
+
with open(file_path, 'r+', encoding='utf-8') as f:
|
23 |
+
original_content = f.read()
|
24 |
+
# 将指针移动到文件开头以覆盖原有内容
|
25 |
+
f.seek(0)
|
26 |
+
f.write(style_content + '\n\n' + original_content)
|
27 |
+
# 清除输出缓冲区以确保所有数据都已写入文件
|
28 |
+
f.truncate()
|
29 |
+
|
30 |
+
|
31 |
+
def remove_trailing_dashes(directory):
|
32 |
+
"""
|
33 |
+
从 Markdown 文件中移除位于文件末尾且后面没有其他内容(除了可能的换行符)的连续破折号(---)。
|
34 |
+
"""
|
35 |
+
for filename in os.listdir(directory):
|
36 |
+
# 检查文件是否以 section 开头且为 .md 文件
|
37 |
+
if filename.startswith('section') and filename.endswith('.md'):
|
38 |
+
filepath = os.path.join(directory, filename)
|
39 |
+
|
40 |
+
# 读取文件内容
|
41 |
+
with open(filepath, 'r', encoding='utf-8') as file:
|
42 |
+
content = file.read()
|
43 |
+
|
44 |
+
# 检查文件末尾是否有连续的破折号(---),并且之后没有其他内容(除了可能的换行符)
|
45 |
+
if content.rstrip().endswith('---') and content.rstrip('---').endswith('\n'):
|
46 |
+
# 移除末尾的连续破折号(---)及其后面的换行符
|
47 |
+
content = content.rstrip('---\n')
|
48 |
+
|
49 |
+
# 替换文件中的所有 "------" 为空字符串
|
50 |
+
content = content.replace("------", "")
|
51 |
+
content = re.sub(r'\n{3,}', '\n\n', content)
|
52 |
+
|
53 |
+
# 写入更新后的内容
|
54 |
+
with open(filepath, 'w', encoding='utf-8') as file:
|
55 |
+
file.write(content)
|
56 |
+
|
57 |
+
|
58 |
+
def remove_empty_lines(filename):
|
59 |
+
# 读取文件内容
|
60 |
+
with open(filename, 'r', encoding='utf-8') as file:
|
61 |
+
content = file.read()
|
62 |
+
# 替换文件中的所有 "------" 为空字符串
|
63 |
+
content = content.replace("------", "")
|
64 |
+
with open(filename, 'w', encoding='utf-8') as file:
|
65 |
+
file.write(content)
|
66 |
+
|
67 |
+
|
68 |
+
def append_string_to_file(file_path):
|
69 |
+
# 读取文件内容
|
70 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
71 |
+
content = file.read()
|
72 |
+
|
73 |
+
# 在文件末尾追加字符串 '---'
|
74 |
+
new_content = content + '---'
|
75 |
+
|
76 |
+
# 写入新内容
|
77 |
+
with open(file_path, 'w', encoding='utf-8') as file:
|
78 |
+
file.write(new_content)
|
79 |
+
|
80 |
+
|
81 |
+
def insert_logo(file_path, logo_path):
|
82 |
+
# 读取文件内容
|
83 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
84 |
+
content = file.read()
|
85 |
+
|
86 |
+
# 获取 Logo 文件名
|
87 |
+
logo_filename = Path(logo_path).name
|
88 |
+
|
89 |
+
# 定义要插入的字符串
|
90 |
+
insert_str = f"""<!--\nbackgroundImage: url("./{logo_filename}");\nbackgroundSize: 10% ;\nbackgroundPosition: 98% 3% ;\n-->
|
91 |
+
"""
|
92 |
+
|
93 |
+
# 使用正则表达式替换
|
94 |
+
# 只替换独立出现的 "---",不包括被其他破折号包围的情况
|
95 |
+
new_content = re.sub(r'(?<!-)---(?!-)', f'\n{insert_str}\n---', content, flags=re.DOTALL)
|
96 |
+
|
97 |
+
# 写入新内容
|
98 |
+
with open(file_path, 'w', encoding='utf-8') as file:
|
99 |
+
file.write(new_content)
|
100 |
+
|
101 |
+
|
102 |
+
def insert_bg_if_no_link(filename, theme_url):
|
103 |
+
# 用于存储最终结果的列表
|
104 |
+
result = []
|
105 |
+
|
106 |
+
# 读取文件内容
|
107 |
+
with open(filename, 'r', encoding='utf-8') as file:
|
108 |
+
content = file.read()
|
109 |
+
|
110 |
+
# 获取 Logo 文件名
|
111 |
+
theme_filename = Path(theme_url).name
|
112 |
+
|
113 |
+
# 使用正则表达式分割文本
|
114 |
+
sections = re.split(r'---+', content)
|
115 |
+
|
116 |
+
# 遍历每个部分
|
117 |
+
for i, section in enumerate(sections):
|
118 |
+
# 查找 "# {字符串内容}" 的模式
|
119 |
+
header_match = re.search(r'# \{(.+?)\}', section)
|
120 |
+
if header_match:
|
121 |
+
# 提取 "# {字符串内容}" 后面的内容
|
122 |
+
header_content = section[header_match.end():].strip()
|
123 |
+
|
124 |
+
# 检查这部分内容是否包含  格式的链接
|
125 |
+
if not re.search(r'!\[[^\]]*\]\([^\)]*\)', header_content):
|
126 |
+
# 如果没有链接也没有背景图片,在 "# {字符串内容}" 之后添加指定文本
|
127 |
+
section = f'{section[:header_match.end()]}\n{section[header_match.end():]}\n---'
|
128 |
+
|
129 |
+
# 将处理后的部分添加到结果列表中
|
130 |
+
result.append(section)
|
131 |
+
|
132 |
+
# 如果不是最后一个部分,保留原始的分隔符
|
133 |
+
if i < len(sections) - 1:
|
134 |
+
result.append('---')
|
135 |
+
|
136 |
+
# 将结果写回文件
|
137 |
+
with open(filename, 'w', encoding='utf-8') as file:
|
138 |
+
file.write(''.join(result))
|
139 |
+
|
140 |
+
|
141 |
+
def title_to_md(file_path, content, title_url):
|
142 |
+
# 读取原文件内容
|
143 |
+
title_filename = Path(title_url).name
|
144 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
145 |
+
original_content = file.read()
|
146 |
+
content = f"\n# {content}\n---"
|
147 |
+
# 在开头添加新内容
|
148 |
+
new_content = content + '\n' + original_content
|
149 |
+
|
150 |
+
# 写入新内容
|
151 |
+
with open(file_path, 'w', encoding='utf-8') as file:
|
152 |
+
file.write(new_content)
|
marp2image.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import subprocess
|
4 |
+
|
5 |
+
def convert_md_files_to_png(md_file_path, output_base_dir="./Marp/"):
|
6 |
+
"""
|
7 |
+
顺序读取和输入与 md_file_path 文件相同路径中的所有命名格式为 {base_name}_{index}.md 的文件,
|
8 |
+
并按 index 的数字大小顺序遍历所有文件,使用 Marp 直接生成 PNG 格式的图片。
|
9 |
+
|
10 |
+
:param md_file_path: MD 文件的完整路径。
|
11 |
+
:param output_base_dir: 输出目录的基路径,将在此目录下创建子目录以保存输出文件。
|
12 |
+
"""
|
13 |
+
|
14 |
+
if not os.path.exists(output_base_dir):
|
15 |
+
os.makedirs(output_base_dir)
|
16 |
+
|
17 |
+
# 获取MD文件名(去掉.md后缀)
|
18 |
+
base_name = os.path.splitext(os.path.basename(md_file_path))[0]
|
19 |
+
directory = os.path.dirname(md_file_path)
|
20 |
+
|
21 |
+
# 获取所有符合条件的文件名
|
22 |
+
md_files = [f for f in os.listdir(directory) if f.startswith(base_name + '_') and f.endswith('.md')]
|
23 |
+
md_files.sort() # 按文件名排序
|
24 |
+
|
25 |
+
# 创建输出目录,目录名与MD文件名相同
|
26 |
+
output_dir = os.path.join(output_base_dir, base_name)
|
27 |
+
os.makedirs(output_dir, exist_ok=True) # 如果目录已存在,则不会抛出异常
|
28 |
+
|
29 |
+
for md_file in md_files:
|
30 |
+
md_file_path = os.path.join(directory, md_file)
|
31 |
+
base = os.path.splitext(os.path.basename(md_file_path))[0]
|
32 |
+
match = re.match(rf"{re.escape(base_name)}_(\d+)", base)
|
33 |
+
if match:
|
34 |
+
index = int(match.group(1)) # 提取索引
|
35 |
+
|
36 |
+
# 构建Marp CLI命令
|
37 |
+
command = ["marp", "--html", "--allow-local-files",
|
38 |
+
"--output", os.path.join(output_dir, f"{base_name}_{index}.png"),
|
39 |
+
"--format", "png",
|
40 |
+
md_file_path]
|
41 |
+
|
42 |
+
try:
|
43 |
+
# 执行命令,将MD文件转换为PNG
|
44 |
+
subprocess.run(command, check=True)
|
45 |
+
print(f"成功将 '{md_file_path}' 转换为PNG并保存至 '{output_dir}'。")
|
46 |
+
except subprocess.CalledProcessError as e:
|
47 |
+
print(f"转换 '{md_file_path}' 时发生错误: {e}")
|
merge_all_videos.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
4 |
+
|
5 |
+
def merge_videos(input_directory):
|
6 |
+
# 定义视频文件的模式
|
7 |
+
video_pattern = r"section_(\d+)_with_audio_with_subs\.mp4"
|
8 |
+
|
9 |
+
# 查找并排序符合模式的文件
|
10 |
+
files = sorted(
|
11 |
+
(fn for fn in os.listdir(input_directory) if re.match(video_pattern, fn)),
|
12 |
+
key=lambda x: int(re.match(video_pattern, x).group(1))
|
13 |
+
)
|
14 |
+
|
15 |
+
# 读取所有视频片段
|
16 |
+
clips = [VideoFileClip(os.path.join(input_directory, file)) for file in files]
|
17 |
+
|
18 |
+
# 合并所有视频片段
|
19 |
+
final_clip = concatenate_videoclips(clips)
|
20 |
+
|
21 |
+
# 输出合并后的视频
|
22 |
+
output_path = os.path.join(input_directory, 'output_merge_all_video.mp4')
|
23 |
+
final_clip.write_videofile(output_path, audio_codec='aac')
|
24 |
+
|
25 |
+
merge_videos("./material/video")
|
movie_editor.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from moviepy.editor import ImageClip, ColorClip, concatenate_videoclips, CompositeVideoClip
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import natsort
|
5 |
+
from PIL import Image
|
6 |
+
import numpy as np
|
7 |
+
from concurrent.futures import ThreadPoolExecutor
|
8 |
+
|
9 |
+
def process_image(file, duration, target_size):
|
10 |
+
with Image.open(file) as img:
|
11 |
+
width, height = img.size
|
12 |
+
ratio = width / height
|
13 |
+
new_width, new_height = target_size
|
14 |
+
|
15 |
+
if width > target_size[0] or height > target_size[1]:
|
16 |
+
if ratio > target_size[0] / target_size[1]:
|
17 |
+
new_width = target_size[0]
|
18 |
+
new_height = int(new_width / ratio)
|
19 |
+
else:
|
20 |
+
new_height = target_size[1]
|
21 |
+
new_width = int(new_height * ratio)
|
22 |
+
|
23 |
+
resized_img = img.resize((new_width, new_height), resample=Image.Resampling.LANCZOS)
|
24 |
+
img_clip = ImageClip(np.array(resized_img)).set_duration(duration).set_position('center')
|
25 |
+
return img_clip
|
26 |
+
|
27 |
+
def images_to_video_with_durations(input_image_path, output_video_path, durations, fps, base_name):
|
28 |
+
# 获取所有符合条件的图片,并按文件名中的数字排序
|
29 |
+
pattern = re.compile(rf'^{re.escape(base_name)}_(\d+)\.png$')
|
30 |
+
image_files = [
|
31 |
+
os.path.join(input_image_path, file)
|
32 |
+
for file in os.listdir(input_image_path)
|
33 |
+
if pattern.match(file)
|
34 |
+
]
|
35 |
+
image_files = natsort.natsorted(image_files, key=lambda x: int(pattern.match(os.path.basename(x)).group(1)))
|
36 |
+
|
37 |
+
# 确定视频的背景尺寸
|
38 |
+
target_size = (1280, 720)
|
39 |
+
|
40 |
+
# 创建背景剪辑
|
41 |
+
bg_clip = ColorClip(size=target_size, color=(255, 255, 255), duration=sum(durations))
|
42 |
+
|
43 |
+
# 使用多线程处理图像文件
|
44 |
+
with ThreadPoolExecutor() as executor:
|
45 |
+
clips = list(executor.map(process_image, image_files, durations, [target_size] * len(image_files)))
|
46 |
+
|
47 |
+
# 将所有剪辑叠加到背景上
|
48 |
+
composite_clips = [CompositeVideoClip([bg_clip.subclip(sum(durations[:i]), sum(durations[:i+1])), clip])
|
49 |
+
for i, clip in enumerate(clips)]
|
50 |
+
|
51 |
+
# 使用concatenate_videoclips函数将所有剪辑串联起来
|
52 |
+
final_clip = concatenate_videoclips(composite_clips, method="compose")
|
53 |
+
|
54 |
+
# 写入视频文件
|
55 |
+
output_filename = f"{base_name}.mp4"
|
56 |
+
final_clip.write_videofile(os.path.join(output_video_path, output_filename), fps=fps)
|
qwen_plus_marp.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
|
3 |
+
import os
|
4 |
+
from http import HTTPStatus
|
5 |
+
import dashscope
|
6 |
+
|
7 |
+
def call_with_stream(content):
|
8 |
+
prompt2 = """
|
9 |
+
请提取下列文字的主要要点,并将这些要点以Markdown格式输出,请输出中文。
|
10 |
+
如果文字内容为链接,请直接以Markdown格式输出该链接。
|
11 |
+
请严格按照Markdown格式输出代码段,避免输出其他内容,避免出现'''之类的符号。
|
12 |
+
但是Markdown文字内容简洁。
|
13 |
+
Markdown代码段中,每一个小标题下的的内容行数禁止超过3行。
|
14 |
+
“- 内容”之后不要再分段落描述。
|
15 |
+
|
16 |
+
示例输入:你有过使用搜索引擎搜索问题却怎么也找不到有效信息的时候吗?
|
17 |
+
示例输出:- 使用搜索引擎遇到的问题:难以找到有效信息
|
18 |
+
|
19 |
+
示例输入:https://alidocs.oss-cn-zhangjiakou.aliyuncs.com/res/54Lq3RNeD78gn7Ed/img/6f9f3049-78a2-46b3-a052-88792052890d.png
|
20 |
+
示例输出:https://alidocs.oss-cn-zhangjiakou.aliyuncs.com/res/54Lq3RNeD78gn7Ed/img/6f9f3049-78a2-46b3-a052-88792052890d.png
|
21 |
+
|
22 |
+
以下是待提炼的文字内容:
|
23 |
+
|
24 |
+
|
25 |
+
"""
|
26 |
+
|
27 |
+
|
28 |
+
messages = [{
|
29 |
+
'role': 'user',
|
30 |
+
'content': f"""
|
31 |
+
"{prompt2}"
|
32 |
+
"{content}"
|
33 |
+
"""
|
34 |
+
}]
|
35 |
+
|
36 |
+
response_content = ''
|
37 |
+
responses = dashscope.Generation.call("qwen-plus",
|
38 |
+
messages=messages,
|
39 |
+
result_format='message',
|
40 |
+
stream=True,
|
41 |
+
incremental_output=True)
|
42 |
+
|
43 |
+
for response in responses:
|
44 |
+
if response.status_code == HTTPStatus.OK:
|
45 |
+
response_content += response.output.choices[0]['message']['content']
|
46 |
+
else:
|
47 |
+
print('Request id: %s, Status code: %s, error code: %s, error message: %s' % (
|
48 |
+
response.request_id, response.status_code,
|
49 |
+
response.code, response.message
|
50 |
+
))
|
51 |
+
|
52 |
+
return response_content
|
srt2video.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
import os
|
3 |
+
def merge_video_and_subtitle(video_and_srt_path, base_name):
|
4 |
+
video_ext = ".mp4"
|
5 |
+
srt_ext = ".srt"
|
6 |
+
|
7 |
+
video_path = os.path.join(video_and_srt_path, f"{base_name}_with_audio" + video_ext).replace("\\", "/")
|
8 |
+
srt_path = os.path.join(video_and_srt_path, base_name + srt_ext).replace("\\", "/")
|
9 |
+
output_path = os.path.join(video_and_srt_path, f"{base_name}_with_audio_with_subs" + video_ext).replace("\\", "/")
|
10 |
+
|
11 |
+
command = [
|
12 |
+
'ffmpeg',
|
13 |
+
'-i', video_path,
|
14 |
+
'-vf', f'subtitles={srt_path}',
|
15 |
+
'-c:a', 'copy',
|
16 |
+
output_path
|
17 |
+
]
|
18 |
+
|
19 |
+
try:
|
20 |
+
subprocess.run(command, check=True)
|
21 |
+
except subprocess.CalledProcessError as e:
|
22 |
+
print(f"An error occurred while merging video and subtitles: {e}")
|
srt_generate_for_each_sentence.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
from moviepy.editor import AudioFileClip
|
4 |
+
from typing import List
|
5 |
+
|
6 |
+
def format_time(seconds):
|
7 |
+
hours, remainder = divmod(seconds, 3600)
|
8 |
+
minutes, seconds = divmod(remainder, 60)
|
9 |
+
milliseconds = int((seconds - int(seconds)) * 1000)
|
10 |
+
seconds = int(seconds)
|
11 |
+
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{milliseconds:03d}"
|
12 |
+
|
13 |
+
# 定义一个函数来获取音频文件的时长
|
14 |
+
def get_audio_duration(file_path):
|
15 |
+
audio = AudioFileClip(file_path)
|
16 |
+
duration = audio.duration
|
17 |
+
audio.close()
|
18 |
+
return duration
|
19 |
+
|
20 |
+
# 定义一个函数来生成SRT格式的字幕行
|
21 |
+
def create_srt_line(index, start_time, end_time, text):
|
22 |
+
return f"{index}\n{start_time} --> {end_time}\n{text}\n\n"
|
23 |
+
|
24 |
+
def generate_srt_from_audio(base_dir: str, output_dir: str, output_srt_file: str) -> None:
|
25 |
+
"""
|
26 |
+
从指定目录下的音频文件夹生成SRT字幕文件。
|
27 |
+
|
28 |
+
:param base_dir: 包含音频文件夹的根目录。
|
29 |
+
:param output_dir: 输出SRT文件的目录。
|
30 |
+
:param output_srt_file: 输出SRT文件的完整路径。
|
31 |
+
"""
|
32 |
+
|
33 |
+
# 创建输出目录,如果它不存在
|
34 |
+
if not os.path.exists(output_dir):
|
35 |
+
os.makedirs(output_dir)
|
36 |
+
|
37 |
+
# 确保输出文件名有.srt后缀
|
38 |
+
if not output_srt_file.endswith('.srt'):
|
39 |
+
output_srt_file += '.srt'
|
40 |
+
|
41 |
+
|
42 |
+
# 初始化当前时间
|
43 |
+
current_time = 2.000 # 初始时间
|
44 |
+
|
45 |
+
# 打开SRT文件进行写入
|
46 |
+
with open(output_srt_file, 'w', encoding='utf-8') as srt_file:
|
47 |
+
srt_index = 1
|
48 |
+
|
49 |
+
# 获取所有符合条件的子目录,并按索引排序
|
50 |
+
sub_dirs = [d for d in os.listdir(base_dir) if d.startswith('audio_for_paragraph_')]
|
51 |
+
sub_dirs.sort(key=lambda x: int(re.search(r'\d+', x).group()))
|
52 |
+
|
53 |
+
# 遍历所有子目录
|
54 |
+
for sub_dir in sub_dirs:
|
55 |
+
sub_dir_path = os.path.join(base_dir, sub_dir)
|
56 |
+
|
57 |
+
# 查找所有的.md和.mp3文件
|
58 |
+
files = [f for f in os.listdir(sub_dir_path) if f.endswith('.md') or f.endswith('.mp3')]
|
59 |
+
md_files = [f for f in files if f.endswith('.md')]
|
60 |
+
|
61 |
+
# 按照index1和index2排序.md文件
|
62 |
+
md_files.sort(key=lambda x: (int(x.split('_')[1]), int(x.split('_')[3].split('.')[0])))
|
63 |
+
|
64 |
+
# 处理每个.md文件
|
65 |
+
for md_file in md_files:
|
66 |
+
md_file_path = os.path.join(sub_dir_path, md_file)
|
67 |
+
mp3_file_path = os.path.splitext(md_file_path)[0] + '.mp3'
|
68 |
+
|
69 |
+
# 确保对应的.mp3文件存在
|
70 |
+
if os.path.exists(mp3_file_path):
|
71 |
+
# 读取.md文件内容
|
72 |
+
with open(md_file_path, 'r', encoding='utf-8') as f:
|
73 |
+
text = f.read().strip()
|
74 |
+
|
75 |
+
# 获取.mp3文件时长
|
76 |
+
duration = get_audio_duration(mp3_file_path)
|
77 |
+
|
78 |
+
# 生成SRT格式的字幕行
|
79 |
+
start_time_str = format_time(current_time)
|
80 |
+
end_time_str = format_time(current_time + duration)
|
81 |
+
srt_line = create_srt_line(srt_index, start_time_str, end_time_str, text)
|
82 |
+
|
83 |
+
# 写入SRT文件
|
84 |
+
srt_file.write(srt_line)
|
85 |
+
|
86 |
+
# 更新当前时间
|
87 |
+
current_time += duration + 0.3 # 加上0.5秒以避免时间重叠
|
88 |
+
|
89 |
+
srt_index += 1
|
90 |
+
else:
|
91 |
+
print(f"No corresponding MP3 file found for {md_file}")
|
92 |
+
|
93 |
+
print("SRT file generated successfully.")
|
theme_generate.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from http import HTTPStatus
|
2 |
+
import dashscope
|
3 |
+
import re
|
4 |
+
|
5 |
+
def theme_generate_with_qwen_plus(input_filepath, title):
|
6 |
+
"""
|
7 |
+
使用通义千问-Plus生成摘要标题。
|
8 |
+
|
9 |
+
本函数读取指定文件的内容,并基于该内容和给定的主题生成一个精确、概括性的摘要标题。
|
10 |
+
|
11 |
+
参数:
|
12 |
+
- input_filepath: 输入文件的路径。该文件的内容将用于生成摘要标题。
|
13 |
+
- title: 生成摘要标题需围绕的主题。确保生成的标题与该主题紧密相关。
|
14 |
+
|
15 |
+
返回:
|
16 |
+
- response_content: 生成的摘要标题。
|
17 |
+
|
18 |
+
注意:
|
19 |
+
- 该函数以流式传输的方式请求生成标题,仅当响应状态码为HTTPStatus.OK时,累加响应内容。
|
20 |
+
- 如果发生错误,函数会打印请求的相关错误信息。
|
21 |
+
"""
|
22 |
+
# 读取输入文件内容
|
23 |
+
with open(input_filepath, 'r', encoding='utf-8') as file:
|
24 |
+
content = file.read()
|
25 |
+
|
26 |
+
# 构建提示信息,指导模型生成与主题紧密相关的标题
|
27 |
+
prompt = f"""
|
28 |
+
请为以下输入文档创建一个精确的、具备概括性的摘要标题,能够反映文档核心内容,忽略所有链接,仅聚焦文字信息。
|
29 |
+
需要紧紧地围绕主题“{title}”。
|
30 |
+
直接呈现标题成果,勿附加其他文本,不超过10个汉字,用中文回答。
|
31 |
+
以下是输入文档的内容:
|
32 |
+
"""
|
33 |
+
|
34 |
+
# 构建消息格式
|
35 |
+
messages = [{
|
36 |
+
'role': 'user',
|
37 |
+
'content': f"""
|
38 |
+
"{prompt}"
|
39 |
+
"{content}"
|
40 |
+
"""
|
41 |
+
}]
|
42 |
+
|
43 |
+
# 初始化响应内容
|
44 |
+
response_content = ''
|
45 |
+
|
46 |
+
# 以流式传输的方式获取生成的结果
|
47 |
+
responses = dashscope.Generation.call("qwen-plus",
|
48 |
+
messages=messages,
|
49 |
+
result_format='message',
|
50 |
+
stream=True,
|
51 |
+
incremental_output=True)
|
52 |
+
|
53 |
+
# 遍历响应,累加生成的标题内容
|
54 |
+
for response in responses:
|
55 |
+
if response.status_code == HTTPStatus.OK:
|
56 |
+
response_content += response.output.choices[0]['message']['content']
|
57 |
+
else:
|
58 |
+
# 打印错误信息
|
59 |
+
print('Request id: %s, Status code: %s, error code: %s, error message: %s' % (
|
60 |
+
response.request_id, response.status_code,
|
61 |
+
response.code, response.message
|
62 |
+
))
|
63 |
+
|
64 |
+
# 移除标题中的双引号(如果存在)
|
65 |
+
response_content = re.sub(r'^"|"$', '', response_content)
|
66 |
+
|
67 |
+
# 返回生成的标题内容
|
68 |
+
return response_content
|
txt2srt.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from datetime import timedelta
|
3 |
+
import math
|
4 |
+
import os
|
5 |
+
|
6 |
+
def split_text_into_sentences(text):
|
7 |
+
# 使用正则表达式匹配句末的标点符号或换行符来分割文本
|
8 |
+
sentences = re.split(r'[\。\?\!\,\n]', text)
|
9 |
+
# 过滤掉空字符串
|
10 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
11 |
+
return sentences
|
12 |
+
|
13 |
+
|
14 |
+
def generate_srt_content(sentences, start_time=0):
|
15 |
+
srt_content = []
|
16 |
+
duration_per_four_chinese_chars = 0.7 # 每四个汉字持续0.7秒
|
17 |
+
duration_per_other_char = 0.2 # 每个其他字符持续0.2秒
|
18 |
+
|
19 |
+
current_time = start_time
|
20 |
+
for index, sentence in enumerate(sentences, start=1):
|
21 |
+
chinese_char_count = len(re.findall(r'[\u4e00-\u9fa5]', sentence))
|
22 |
+
|
23 |
+
other_char_count = len(sentence) - chinese_char_count
|
24 |
+
|
25 |
+
# 确保汉字的总持续时间按每四个汉字0.7秒计算
|
26 |
+
total_chinese_duration = math.ceil(chinese_char_count / 4) * duration_per_four_chinese_chars
|
27 |
+
# 计算非汉字字符的总持续时间
|
28 |
+
total_other_duration = other_char_count * duration_per_other_char
|
29 |
+
|
30 |
+
# 总持续时间
|
31 |
+
total_duration = total_chinese_duration + total_other_duration
|
32 |
+
|
33 |
+
# 确保总持续时间不会导致时间过长(例如超过1分钟)
|
34 |
+
if total_duration > 60:
|
35 |
+
total_duration = 60
|
36 |
+
|
37 |
+
# 获取总秒数
|
38 |
+
total_seconds_start = current_time
|
39 |
+
total_seconds_end = min(current_time + total_duration, current_time + 60)
|
40 |
+
|
41 |
+
# 分离秒和毫秒
|
42 |
+
start_seconds = int(total_seconds_start)
|
43 |
+
start_milliseconds = int((total_seconds_start - start_seconds) * 1000)
|
44 |
+
end_seconds = int(total_seconds_end)
|
45 |
+
end_milliseconds = int((total_seconds_end - end_seconds) * 1000)
|
46 |
+
|
47 |
+
# 计算开始时间的小时、分钟、秒和毫秒
|
48 |
+
hours, remainder = divmod(start_seconds, 3600) # 一小时有3600秒
|
49 |
+
minutes, seconds = divmod(remainder, 60)
|
50 |
+
milliseconds = start_milliseconds
|
51 |
+
|
52 |
+
start = '{:02d}:{:02d}:{:02d},{:03d}'.format(hours, minutes, seconds, milliseconds)
|
53 |
+
|
54 |
+
# 计算结束时间的小时、分钟、秒和毫秒
|
55 |
+
hours, remainder = divmod(end_seconds, 3600)
|
56 |
+
minutes, seconds = divmod(remainder, 60)
|
57 |
+
milliseconds = end_milliseconds
|
58 |
+
|
59 |
+
end = '{:02d}:{:02d}:{:02d},{:03d}'.format(hours, minutes, seconds, milliseconds)
|
60 |
+
|
61 |
+
# 构建SRT格式的单条记录
|
62 |
+
srt_line = f"{index}\n{start} --> {end}\n{sentence}\n"
|
63 |
+
srt_content.append(srt_line)
|
64 |
+
|
65 |
+
# 更新当前时间以供下一句使用
|
66 |
+
current_time += total_duration
|
67 |
+
|
68 |
+
return srt_content
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
def txt_to_srt(txt_file_path, output_srt_file_path):
|
74 |
+
"""
|
75 |
+
将TXT文件转换为SRT字幕文件。
|
76 |
+
|
77 |
+
:param txt_file_path: 输入TXT文件的路径
|
78 |
+
:param output_srt_file_path: 输出SRT文件的路径
|
79 |
+
:param start_time: 字幕开始的时间(秒),默认为2秒
|
80 |
+
"""
|
81 |
+
with open(txt_file_path, 'r', encoding='utf-8') as file:
|
82 |
+
text = file.read()
|
83 |
+
|
84 |
+
sentences = split_text_into_sentences(text)
|
85 |
+
srt_content = generate_srt_content(sentences, start_time=2)
|
86 |
+
|
87 |
+
srt_content_str = '\n'.join(srt_content)
|
88 |
+
|
89 |
+
file_name = 'output_with_audio.srt'
|
90 |
+
|
91 |
+
|
92 |
+
with open(os.path.join(output_srt_file_path, file_name), 'w', encoding='utf-8') as file:
|
93 |
+
file.write(srt_content_str)
|
94 |
+
|
95 |
+
print("SRT文件已生成。")
|