chaowenguo commited on
Commit
1e7eefa
·
verified ·
1 Parent(s): 293ab53

Upload 29 files

Browse files
app/__init__.py ADDED
File without changes
app/asgi.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Application implementation - ASGI."""
2
+
3
+ import os
4
+
5
+ from fastapi import FastAPI, Request
6
+ from fastapi.exceptions import RequestValidationError
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import JSONResponse
9
+ from fastapi.staticfiles import StaticFiles
10
+ from loguru import logger
11
+
12
+ from app.config import config
13
+ from app.models.exception import HttpException
14
+ from app.router import root_api_router
15
+ from app.utils import utils
16
+
17
+
18
+ def exception_handler(request: Request, e: HttpException):
19
+ return JSONResponse(
20
+ status_code=e.status_code,
21
+ content=utils.get_response(e.status_code, e.data, e.message),
22
+ )
23
+
24
+
25
+ def validation_exception_handler(request: Request, e: RequestValidationError):
26
+ return JSONResponse(
27
+ status_code=400,
28
+ content=utils.get_response(
29
+ status=400, data=e.errors(), message="field required"
30
+ ),
31
+ )
32
+
33
+
34
+ def get_application() -> FastAPI:
35
+ """Initialize FastAPI application.
36
+
37
+ Returns:
38
+ FastAPI: Application object instance.
39
+
40
+ """
41
+ instance = FastAPI(
42
+ title=config.project_name,
43
+ description=config.project_description,
44
+ version=config.project_version,
45
+ debug=False,
46
+ )
47
+ instance.include_router(root_api_router)
48
+ instance.add_exception_handler(HttpException, exception_handler)
49
+ instance.add_exception_handler(RequestValidationError, validation_exception_handler)
50
+ return instance
51
+
52
+
53
+ app = get_application()
54
+
55
+ # Configures the CORS middleware for the FastAPI app
56
+ cors_allowed_origins_str = os.getenv("CORS_ALLOWED_ORIGINS", "")
57
+ origins = cors_allowed_origins_str.split(",") if cors_allowed_origins_str else ["*"]
58
+ app.add_middleware(
59
+ CORSMiddleware,
60
+ allow_origins=origins,
61
+ allow_credentials=True,
62
+ allow_methods=["*"],
63
+ allow_headers=["*"],
64
+ )
65
+
66
+ task_dir = utils.task_dir()
67
+ app.mount(
68
+ "/tasks", StaticFiles(directory=task_dir, html=True, follow_symlink=True), name=""
69
+ )
70
+
71
+ public_dir = utils.public_dir()
72
+ app.mount("/", StaticFiles(directory=public_dir, html=True), name="")
73
+
74
+
75
+ @app.on_event("shutdown")
76
+ def shutdown_event():
77
+ logger.info("shutdown event")
78
+
79
+
80
+ @app.on_event("startup")
81
+ def startup_event():
82
+ logger.info("startup event")
app/config/__init__.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ from loguru import logger
5
+
6
+ from app.config import config
7
+ from app.utils import utils
8
+
9
+
10
+ def __init_logger():
11
+ # _log_file = utils.storage_dir("logs/server.log")
12
+ _lvl = config.log_level
13
+ root_dir = os.path.dirname(
14
+ os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
15
+ )
16
+
17
+ def format_record(record):
18
+ # 获取日志记录中的文件全路径
19
+ file_path = record["file"].path
20
+ # 将绝对路径转换为相对于项目根目录的路径
21
+ relative_path = os.path.relpath(file_path, root_dir)
22
+ # 更新记录中的文件路径
23
+ record["file"].path = f"./{relative_path}"
24
+ # 返回修改后的格式字符串
25
+ # 您可以根据需要调整这里的格式
26
+ _format = (
27
+ "<green>{time:%Y-%m-%d %H:%M:%S}</> | "
28
+ + "<level>{level}</> | "
29
+ + '"{file.path}:{line}":<blue> {function}</> '
30
+ + "- <level>{message}</>"
31
+ + "\n"
32
+ )
33
+ return _format
34
+
35
+ logger.remove()
36
+
37
+ logger.add(
38
+ sys.stdout,
39
+ level=_lvl,
40
+ format=format_record,
41
+ colorize=True,
42
+ )
43
+
44
+ # logger.add(
45
+ # _log_file,
46
+ # level=_lvl,
47
+ # format=format_record,
48
+ # rotation="00:00",
49
+ # retention="3 days",
50
+ # backtrace=True,
51
+ # diagnose=True,
52
+ # enqueue=True,
53
+ # )
54
+
55
+
56
+ __init_logger()
app/config/config.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import socket
4
+
5
+ import toml
6
+ from loguru import logger
7
+
8
+ root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
9
+ config_file = f"{root_dir}/config.toml"
10
+
11
+
12
+ def load_config():
13
+ # fix: IsADirectoryError: [Errno 21] Is a directory: '/MoneyPrinterTurbo/config.toml'
14
+ if os.path.isdir(config_file):
15
+ shutil.rmtree(config_file)
16
+
17
+ if not os.path.isfile(config_file):
18
+ example_file = f"{root_dir}/config.example.toml"
19
+ if os.path.isfile(example_file):
20
+ shutil.copyfile(example_file, config_file)
21
+ logger.info("copy config.example.toml to config.toml")
22
+
23
+ logger.info(f"load config from file: {config_file}")
24
+
25
+ try:
26
+ _config_ = toml.load(config_file)
27
+ except Exception as e:
28
+ logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig")
29
+ with open(config_file, mode="r", encoding="utf-8-sig") as fp:
30
+ _cfg_content = fp.read()
31
+ _config_ = toml.loads(_cfg_content)
32
+ return _config_
33
+
34
+
35
+ def save_config():
36
+ with open(config_file, "w", encoding="utf-8") as f:
37
+ _cfg["app"] = app
38
+ _cfg["azure"] = azure
39
+ _cfg["siliconflow"] = siliconflow
40
+ _cfg["ui"] = ui
41
+ f.write(toml.dumps(_cfg))
42
+
43
+
44
+ _cfg = load_config()
45
+ app = _cfg.get("app", {})
46
+ whisper = _cfg.get("whisper", {})
47
+ proxy = _cfg.get("proxy", {})
48
+ azure = _cfg.get("azure", {})
49
+ siliconflow = _cfg.get("siliconflow", {})
50
+ ui = _cfg.get(
51
+ "ui",
52
+ {
53
+ "hide_log": False,
54
+ },
55
+ )
56
+
57
+ hostname = socket.gethostname()
58
+
59
+ log_level = _cfg.get("log_level", "DEBUG")
60
+ listen_host = _cfg.get("listen_host", "0.0.0.0")
61
+ listen_port = _cfg.get("listen_port", 8080)
62
+ project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
63
+ project_description = _cfg.get(
64
+ "project_description",
65
+ "<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>",
66
+ )
67
+ project_version = _cfg.get("project_version", "1.2.6")
68
+ reload_debug = False
69
+
70
+ imagemagick_path = app.get("imagemagick_path", "")
71
+ if imagemagick_path and os.path.isfile(imagemagick_path):
72
+ os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path
73
+
74
+ ffmpeg_path = app.get("ffmpeg_path", "")
75
+ if ffmpeg_path and os.path.isfile(ffmpeg_path):
76
+ os.environ["IMAGEIO_FFMPEG_EXE"] = ffmpeg_path
77
+
78
+ logger.info(f"{project_name} v{project_version}")
app/controllers/base.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from uuid import uuid4
2
+
3
+ from fastapi import Request
4
+
5
+ from app.config import config
6
+ from app.models.exception import HttpException
7
+
8
+
9
+ def get_task_id(request: Request):
10
+ task_id = request.headers.get("x-task-id")
11
+ if not task_id:
12
+ task_id = uuid4()
13
+ return str(task_id)
14
+
15
+
16
+ def get_api_key(request: Request):
17
+ api_key = request.headers.get("x-api-key")
18
+ return api_key
19
+
20
+
21
+ def verify_token(request: Request):
22
+ token = get_api_key(request)
23
+ if token != config.app.get("api_key", ""):
24
+ request_id = get_task_id(request)
25
+ request_url = request.url
26
+ user_agent = request.headers.get("user-agent")
27
+ raise HttpException(
28
+ task_id=request_id,
29
+ status_code=401,
30
+ message=f"invalid token: {request_url}, {user_agent}",
31
+ )
app/controllers/manager/base_manager.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ from typing import Any, Callable, Dict
3
+
4
+
5
+ class TaskManager:
6
+ def __init__(self, max_concurrent_tasks: int):
7
+ self.max_concurrent_tasks = max_concurrent_tasks
8
+ self.current_tasks = 0
9
+ self.lock = threading.Lock()
10
+ self.queue = self.create_queue()
11
+
12
+ def create_queue(self):
13
+ raise NotImplementedError()
14
+
15
+ def add_task(self, func: Callable, *args: Any, **kwargs: Any):
16
+ with self.lock:
17
+ if self.current_tasks < self.max_concurrent_tasks:
18
+ print(f"add task: {func.__name__}, current_tasks: {self.current_tasks}")
19
+ self.execute_task(func, *args, **kwargs)
20
+ else:
21
+ print(
22
+ f"enqueue task: {func.__name__}, current_tasks: {self.current_tasks}"
23
+ )
24
+ self.enqueue({"func": func, "args": args, "kwargs": kwargs})
25
+
26
+ def execute_task(self, func: Callable, *args: Any, **kwargs: Any):
27
+ thread = threading.Thread(
28
+ target=self.run_task, args=(func, *args), kwargs=kwargs
29
+ )
30
+ thread.start()
31
+
32
+ def run_task(self, func: Callable, *args: Any, **kwargs: Any):
33
+ try:
34
+ with self.lock:
35
+ self.current_tasks += 1
36
+ func(*args, **kwargs) # call the function here, passing *args and **kwargs.
37
+ finally:
38
+ self.task_done()
39
+
40
+ def check_queue(self):
41
+ with self.lock:
42
+ if (
43
+ self.current_tasks < self.max_concurrent_tasks
44
+ and not self.is_queue_empty()
45
+ ):
46
+ task_info = self.dequeue()
47
+ func = task_info["func"]
48
+ args = task_info.get("args", ())
49
+ kwargs = task_info.get("kwargs", {})
50
+ self.execute_task(func, *args, **kwargs)
51
+
52
+ def task_done(self):
53
+ with self.lock:
54
+ self.current_tasks -= 1
55
+ self.check_queue()
56
+
57
+ def enqueue(self, task: Dict):
58
+ raise NotImplementedError()
59
+
60
+ def dequeue(self):
61
+ raise NotImplementedError()
62
+
63
+ def is_queue_empty(self):
64
+ raise NotImplementedError()
app/controllers/manager/memory_manager.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from queue import Queue
2
+ from typing import Dict
3
+
4
+ from app.controllers.manager.base_manager import TaskManager
5
+
6
+
7
+ class InMemoryTaskManager(TaskManager):
8
+ def create_queue(self):
9
+ return Queue()
10
+
11
+ def enqueue(self, task: Dict):
12
+ self.queue.put(task)
13
+
14
+ def dequeue(self):
15
+ return self.queue.get()
16
+
17
+ def is_queue_empty(self):
18
+ return self.queue.empty()
app/controllers/manager/redis_manager.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Dict
3
+
4
+ import redis
5
+
6
+ from app.controllers.manager.base_manager import TaskManager
7
+ from app.models.schema import VideoParams
8
+ from app.services import task as tm
9
+
10
+ FUNC_MAP = {
11
+ "start": tm.start,
12
+ # 'start_test': tm.start_test
13
+ }
14
+
15
+
16
+ class RedisTaskManager(TaskManager):
17
+ def __init__(self, max_concurrent_tasks: int, redis_url: str):
18
+ self.redis_client = redis.Redis.from_url(redis_url)
19
+ super().__init__(max_concurrent_tasks)
20
+
21
+ def create_queue(self):
22
+ return "task_queue"
23
+
24
+ def enqueue(self, task: Dict):
25
+ task_with_serializable_params = task.copy()
26
+
27
+ if "params" in task["kwargs"] and isinstance(
28
+ task["kwargs"]["params"], VideoParams
29
+ ):
30
+ task_with_serializable_params["kwargs"]["params"] = task["kwargs"][
31
+ "params"
32
+ ].dict()
33
+
34
+ # 将函数对象转换为其名称
35
+ task_with_serializable_params["func"] = task["func"].__name__
36
+ self.redis_client.rpush(self.queue, json.dumps(task_with_serializable_params))
37
+
38
+ def dequeue(self):
39
+ task_json = self.redis_client.lpop(self.queue)
40
+ if task_json:
41
+ task_info = json.loads(task_json)
42
+ # 将函数名称转换回函数对象
43
+ task_info["func"] = FUNC_MAP[task_info["func"]]
44
+
45
+ if "params" in task_info["kwargs"] and isinstance(
46
+ task_info["kwargs"]["params"], dict
47
+ ):
48
+ task_info["kwargs"]["params"] = VideoParams(
49
+ **task_info["kwargs"]["params"]
50
+ )
51
+
52
+ return task_info
53
+ return None
54
+
55
+ def is_queue_empty(self):
56
+ return self.redis_client.llen(self.queue) == 0
app/controllers/ping.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Request
2
+
3
+ router = APIRouter()
4
+
5
+
6
+ @router.get(
7
+ "/ping",
8
+ tags=["Health Check"],
9
+ description="检查服务可用性",
10
+ response_description="pong",
11
+ )
12
+ def ping(request: Request) -> str:
13
+ return "pong"
app/controllers/v1/base.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+
4
+ def new_router(dependencies=None):
5
+ router = APIRouter()
6
+ router.tags = ["V1"]
7
+ router.prefix = "/api/v1"
8
+ # 将认证依赖项应用于所有路由
9
+ if dependencies:
10
+ router.dependencies = dependencies
11
+ return router
app/controllers/v1/llm.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request
2
+
3
+ from app.controllers.v1.base import new_router
4
+ from app.models.schema import (
5
+ VideoScriptRequest,
6
+ VideoScriptResponse,
7
+ VideoTermsRequest,
8
+ VideoTermsResponse,
9
+ )
10
+ from app.services import llm
11
+ from app.utils import utils
12
+
13
+ # authentication dependency
14
+ # router = new_router(dependencies=[Depends(base.verify_token)])
15
+ router = new_router()
16
+
17
+
18
+ @router.post(
19
+ "/scripts",
20
+ response_model=VideoScriptResponse,
21
+ summary="Create a script for the video",
22
+ )
23
+ def generate_video_script(request: Request, body: VideoScriptRequest):
24
+ video_script = llm.generate_script(
25
+ video_subject=body.video_subject,
26
+ language=body.video_language,
27
+ paragraph_number=body.paragraph_number,
28
+ )
29
+ response = {"video_script": video_script}
30
+ return utils.get_response(200, response)
31
+
32
+
33
+ @router.post(
34
+ "/terms",
35
+ response_model=VideoTermsResponse,
36
+ summary="Generate video terms based on the video script",
37
+ )
38
+ def generate_video_terms(request: Request, body: VideoTermsRequest):
39
+ video_terms = llm.generate_terms(
40
+ video_subject=body.video_subject,
41
+ video_script=body.video_script,
42
+ amount=body.amount,
43
+ )
44
+ response = {"video_terms": video_terms}
45
+ return utils.get_response(200, response)
app/controllers/v1/video.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import os
3
+ import pathlib
4
+ import shutil
5
+ from typing import Union
6
+
7
+ from fastapi import BackgroundTasks, Depends, Path, Request, UploadFile
8
+ from fastapi.params import File
9
+ from fastapi.responses import FileResponse, StreamingResponse
10
+ from loguru import logger
11
+
12
+ from app.config import config
13
+ from app.controllers import base
14
+ from app.controllers.manager.memory_manager import InMemoryTaskManager
15
+ from app.controllers.manager.redis_manager import RedisTaskManager
16
+ from app.controllers.v1.base import new_router
17
+ from app.models.exception import HttpException
18
+ from app.models.schema import (
19
+ AudioRequest,
20
+ BgmRetrieveResponse,
21
+ BgmUploadResponse,
22
+ SubtitleRequest,
23
+ TaskDeletionResponse,
24
+ TaskQueryRequest,
25
+ TaskQueryResponse,
26
+ TaskResponse,
27
+ TaskVideoRequest,
28
+ )
29
+ from app.services import state as sm
30
+ from app.services import task as tm
31
+ from app.utils import utils
32
+
33
+ # 认证依赖项
34
+ # router = new_router(dependencies=[Depends(base.verify_token)])
35
+ router = new_router()
36
+
37
+ _enable_redis = config.app.get("enable_redis", False)
38
+ _redis_host = config.app.get("redis_host", "localhost")
39
+ _redis_port = config.app.get("redis_port", 6379)
40
+ _redis_db = config.app.get("redis_db", 0)
41
+ _redis_password = config.app.get("redis_password", None)
42
+ _max_concurrent_tasks = config.app.get("max_concurrent_tasks", 5)
43
+
44
+ redis_url = f"redis://:{_redis_password}@{_redis_host}:{_redis_port}/{_redis_db}"
45
+ # 根据配置选择合适的任务管理器
46
+ if _enable_redis:
47
+ task_manager = RedisTaskManager(
48
+ max_concurrent_tasks=_max_concurrent_tasks, redis_url=redis_url
49
+ )
50
+ else:
51
+ task_manager = InMemoryTaskManager(max_concurrent_tasks=_max_concurrent_tasks)
52
+
53
+
54
+ @router.post("/videos", response_model=TaskResponse, summary="Generate a short video")
55
+ def create_video(
56
+ background_tasks: BackgroundTasks, request: Request, body: TaskVideoRequest
57
+ ):
58
+ return create_task(request, body, stop_at="video")
59
+
60
+
61
+ @router.post("/subtitle", response_model=TaskResponse, summary="Generate subtitle only")
62
+ def create_subtitle(
63
+ background_tasks: BackgroundTasks, request: Request, body: SubtitleRequest
64
+ ):
65
+ return create_task(request, body, stop_at="subtitle")
66
+
67
+
68
+ @router.post("/audio", response_model=TaskResponse, summary="Generate audio only")
69
+ def create_audio(
70
+ background_tasks: BackgroundTasks, request: Request, body: AudioRequest
71
+ ):
72
+ return create_task(request, body, stop_at="audio")
73
+
74
+
75
+ def create_task(
76
+ request: Request,
77
+ body: Union[TaskVideoRequest, SubtitleRequest, AudioRequest],
78
+ stop_at: str,
79
+ ):
80
+ task_id = utils.get_uuid()
81
+ request_id = base.get_task_id(request)
82
+ try:
83
+ task = {
84
+ "task_id": task_id,
85
+ "request_id": request_id,
86
+ "params": body.model_dump(),
87
+ }
88
+ sm.state.update_task(task_id)
89
+ task_manager.add_task(tm.start, task_id=task_id, params=body, stop_at=stop_at)
90
+ logger.success(f"Task created: {utils.to_json(task)}")
91
+ return utils.get_response(200, task)
92
+ except ValueError as e:
93
+ raise HttpException(
94
+ task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}"
95
+ )
96
+
97
+ from fastapi import Query
98
+
99
+ @router.get("/tasks", response_model=TaskQueryResponse, summary="Get all tasks")
100
+ def get_all_tasks(request: Request, page: int = Query(1, ge=1), page_size: int = Query(10, ge=1)):
101
+ request_id = base.get_task_id(request)
102
+ tasks, total = sm.state.get_all_tasks(page, page_size)
103
+
104
+ response = {
105
+ "tasks": tasks,
106
+ "total": total,
107
+ "page": page,
108
+ "page_size": page_size,
109
+ }
110
+ return utils.get_response(200, response)
111
+
112
+
113
+
114
+ @router.get(
115
+ "/tasks/{task_id}", response_model=TaskQueryResponse, summary="Query task status"
116
+ )
117
+ def get_task(
118
+ request: Request,
119
+ task_id: str = Path(..., description="Task ID"),
120
+ query: TaskQueryRequest = Depends(),
121
+ ):
122
+ endpoint = config.app.get("endpoint", "")
123
+ if not endpoint:
124
+ endpoint = str(request.base_url)
125
+ endpoint = endpoint.rstrip("/")
126
+
127
+ request_id = base.get_task_id(request)
128
+ task = sm.state.get_task(task_id)
129
+ if task:
130
+ task_dir = utils.task_dir()
131
+
132
+ def file_to_uri(file):
133
+ if not file.startswith(endpoint):
134
+ _uri_path = v.replace(task_dir, "tasks").replace("\\", "/")
135
+ _uri_path = f"{endpoint}/{_uri_path}"
136
+ else:
137
+ _uri_path = file
138
+ return _uri_path
139
+
140
+ if "videos" in task:
141
+ videos = task["videos"]
142
+ urls = []
143
+ for v in videos:
144
+ urls.append(file_to_uri(v))
145
+ task["videos"] = urls
146
+ if "combined_videos" in task:
147
+ combined_videos = task["combined_videos"]
148
+ urls = []
149
+ for v in combined_videos:
150
+ urls.append(file_to_uri(v))
151
+ task["combined_videos"] = urls
152
+ return utils.get_response(200, task)
153
+
154
+ raise HttpException(
155
+ task_id=task_id, status_code=404, message=f"{request_id}: task not found"
156
+ )
157
+
158
+
159
+ @router.delete(
160
+ "/tasks/{task_id}",
161
+ response_model=TaskDeletionResponse,
162
+ summary="Delete a generated short video task",
163
+ )
164
+ def delete_video(request: Request, task_id: str = Path(..., description="Task ID")):
165
+ request_id = base.get_task_id(request)
166
+ task = sm.state.get_task(task_id)
167
+ if task:
168
+ tasks_dir = utils.task_dir()
169
+ current_task_dir = os.path.join(tasks_dir, task_id)
170
+ if os.path.exists(current_task_dir):
171
+ shutil.rmtree(current_task_dir)
172
+
173
+ sm.state.delete_task(task_id)
174
+ logger.success(f"video deleted: {utils.to_json(task)}")
175
+ return utils.get_response(200)
176
+
177
+ raise HttpException(
178
+ task_id=task_id, status_code=404, message=f"{request_id}: task not found"
179
+ )
180
+
181
+
182
+ @router.get(
183
+ "/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files"
184
+ )
185
+ def get_bgm_list(request: Request):
186
+ suffix = "*.mp3"
187
+ song_dir = utils.song_dir()
188
+ files = glob.glob(os.path.join(song_dir, suffix))
189
+ bgm_list = []
190
+ for file in files:
191
+ bgm_list.append(
192
+ {
193
+ "name": os.path.basename(file),
194
+ "size": os.path.getsize(file),
195
+ "file": file,
196
+ }
197
+ )
198
+ response = {"files": bgm_list}
199
+ return utils.get_response(200, response)
200
+
201
+
202
+ @router.post(
203
+ "/musics",
204
+ response_model=BgmUploadResponse,
205
+ summary="Upload the BGM file to the songs directory",
206
+ )
207
+ def upload_bgm_file(request: Request, file: UploadFile = File(...)):
208
+ request_id = base.get_task_id(request)
209
+ # check file ext
210
+ if file.filename.endswith("mp3"):
211
+ song_dir = utils.song_dir()
212
+ save_path = os.path.join(song_dir, file.filename)
213
+ # save file
214
+ with open(save_path, "wb+") as buffer:
215
+ # If the file already exists, it will be overwritten
216
+ file.file.seek(0)
217
+ buffer.write(file.file.read())
218
+ response = {"file": save_path}
219
+ return utils.get_response(200, response)
220
+
221
+ raise HttpException(
222
+ "", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded"
223
+ )
224
+
225
+
226
+ @router.get("/stream/{file_path:path}")
227
+ async def stream_video(request: Request, file_path: str):
228
+ tasks_dir = utils.task_dir()
229
+ video_path = os.path.join(tasks_dir, file_path)
230
+ range_header = request.headers.get("Range")
231
+ video_size = os.path.getsize(video_path)
232
+ start, end = 0, video_size - 1
233
+
234
+ length = video_size
235
+ if range_header:
236
+ range_ = range_header.split("bytes=")[1]
237
+ start, end = [int(part) if part else None for part in range_.split("-")]
238
+ if start is None:
239
+ start = video_size - end
240
+ end = video_size - 1
241
+ if end is None:
242
+ end = video_size - 1
243
+ length = end - start + 1
244
+
245
+ def file_iterator(file_path, offset=0, bytes_to_read=None):
246
+ with open(file_path, "rb") as f:
247
+ f.seek(offset, os.SEEK_SET)
248
+ remaining = bytes_to_read or video_size
249
+ while remaining > 0:
250
+ bytes_to_read = min(4096, remaining)
251
+ data = f.read(bytes_to_read)
252
+ if not data:
253
+ break
254
+ remaining -= len(data)
255
+ yield data
256
+
257
+ response = StreamingResponse(
258
+ file_iterator(video_path, start, length), media_type="video/mp4"
259
+ )
260
+ response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
261
+ response.headers["Accept-Ranges"] = "bytes"
262
+ response.headers["Content-Length"] = str(length)
263
+ response.status_code = 206 # Partial Content
264
+
265
+ return response
266
+
267
+
268
+ @router.get("/download/{file_path:path}")
269
+ async def download_video(_: Request, file_path: str):
270
+ """
271
+ download video
272
+ :param _: Request request
273
+ :param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4
274
+ :return: video file
275
+ """
276
+ tasks_dir = utils.task_dir()
277
+ video_path = os.path.join(tasks_dir, file_path)
278
+ file_path = pathlib.Path(video_path)
279
+ filename = file_path.stem
280
+ extension = file_path.suffix
281
+ headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
282
+ return FileResponse(
283
+ path=video_path,
284
+ headers=headers,
285
+ filename=f"{filename}{extension}",
286
+ media_type=f"video/{extension[1:]}",
287
+ )
app/models/__init__.py ADDED
File without changes
app/models/const.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PUNCTUATIONS = [
2
+ "?",
3
+ ",",
4
+ ".",
5
+ "、",
6
+ ";",
7
+ ":",
8
+ "!",
9
+ "…",
10
+ "?",
11
+ ",",
12
+ "。",
13
+ "、",
14
+ ";",
15
+ ":",
16
+ "!",
17
+ "...",
18
+ ]
19
+
20
+ TASK_STATE_FAILED = -1
21
+ TASK_STATE_COMPLETE = 1
22
+ TASK_STATE_PROCESSING = 4
23
+
24
+ FILE_TYPE_VIDEOS = ["mp4", "mov", "mkv", "webm"]
25
+ FILE_TYPE_IMAGES = ["jpg", "jpeg", "png", "bmp"]
app/models/exception.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ from typing import Any
3
+
4
+ from loguru import logger
5
+
6
+
7
+ class HttpException(Exception):
8
+ def __init__(
9
+ self, task_id: str, status_code: int, message: str = "", data: Any = None
10
+ ):
11
+ self.message = message
12
+ self.status_code = status_code
13
+ self.data = data
14
+ # Retrieve the exception stack trace information.
15
+ tb_str = traceback.format_exc().strip()
16
+ if not tb_str or tb_str == "NoneType: None":
17
+ msg = f"HttpException: {status_code}, {task_id}, {message}"
18
+ else:
19
+ msg = f"HttpException: {status_code}, {task_id}, {message}\n{tb_str}"
20
+
21
+ if status_code == 400:
22
+ logger.warning(msg)
23
+ else:
24
+ logger.error(msg)
25
+
26
+
27
+ class FileNotFoundException(Exception):
28
+ pass
app/models/schema.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ from enum import Enum
3
+ from typing import Any, List, Optional, Union
4
+
5
+ import pydantic
6
+ from pydantic import BaseModel
7
+
8
+ # 忽略 Pydantic 的特定警告
9
+ warnings.filterwarnings(
10
+ "ignore",
11
+ category=UserWarning,
12
+ message="Field name.*shadows an attribute in parent.*",
13
+ )
14
+
15
+
16
+ class VideoConcatMode(str, Enum):
17
+ random = "random"
18
+ sequential = "sequential"
19
+
20
+
21
+ class VideoTransitionMode(str, Enum):
22
+ none = None
23
+ shuffle = "Shuffle"
24
+ fade_in = "FadeIn"
25
+ fade_out = "FadeOut"
26
+ slide_in = "SlideIn"
27
+ slide_out = "SlideOut"
28
+
29
+
30
+ class VideoAspect(str, Enum):
31
+ landscape = "16:9"
32
+ portrait = "9:16"
33
+ square = "1:1"
34
+
35
+ def to_resolution(self):
36
+ if self == VideoAspect.landscape.value:
37
+ return 1920, 1080
38
+ elif self == VideoAspect.portrait.value:
39
+ return 1080, 1920
40
+ elif self == VideoAspect.square.value:
41
+ return 1080, 1080
42
+ return 1080, 1920
43
+
44
+
45
+ class _Config:
46
+ arbitrary_types_allowed = True
47
+
48
+
49
+ @pydantic.dataclasses.dataclass(config=_Config)
50
+ class MaterialInfo:
51
+ provider: str = "pexels"
52
+ url: str = ""
53
+ duration: int = 0
54
+
55
+
56
+ class VideoParams(BaseModel):
57
+ """
58
+ {
59
+ "video_subject": "",
60
+ "video_aspect": "横屏 16:9(西瓜视频)",
61
+ "voice_name": "女生-晓晓",
62
+ "bgm_name": "random",
63
+ "font_name": "STHeitiMedium 黑体-中",
64
+ "text_color": "#FFFFFF",
65
+ "font_size": 60,
66
+ "stroke_color": "#000000",
67
+ "stroke_width": 1.5
68
+ }
69
+ """
70
+
71
+ video_subject: str
72
+ video_script: str = "" # Script used to generate the video
73
+ video_terms: Optional[str | list] = None # Keywords used to generate the video
74
+ video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
75
+ video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
76
+ video_transition_mode: Optional[VideoTransitionMode] = None
77
+ video_clip_duration: Optional[int] = 5
78
+ video_count: Optional[int] = 1
79
+
80
+ video_source: Optional[str] = "pexels"
81
+ video_materials: Optional[List[MaterialInfo]] = (
82
+ None # Materials used to generate the video
83
+ )
84
+
85
+ video_language: Optional[str] = "" # auto detect
86
+
87
+ voice_name: Optional[str] = ""
88
+ voice_volume: Optional[float] = 1.0
89
+ voice_rate: Optional[float] = 1.0
90
+ bgm_type: Optional[str] = "random"
91
+ bgm_file: Optional[str] = ""
92
+ bgm_volume: Optional[float] = 0.2
93
+
94
+ subtitle_enabled: Optional[bool] = True
95
+ subtitle_position: Optional[str] = "bottom" # top, bottom, center
96
+ custom_position: float = 70.0
97
+ font_name: Optional[str] = "STHeitiMedium.ttc"
98
+ text_fore_color: Optional[str] = "#FFFFFF"
99
+ text_background_color: Union[bool, str] = True
100
+
101
+ font_size: int = 60
102
+ stroke_color: Optional[str] = "#000000"
103
+ stroke_width: float = 1.5
104
+ n_threads: Optional[int] = 2
105
+ paragraph_number: Optional[int] = 1
106
+
107
+
108
+ class SubtitleRequest(BaseModel):
109
+ video_script: str
110
+ video_language: Optional[str] = ""
111
+ voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
112
+ voice_volume: Optional[float] = 1.0
113
+ voice_rate: Optional[float] = 1.2
114
+ bgm_type: Optional[str] = "random"
115
+ bgm_file: Optional[str] = ""
116
+ bgm_volume: Optional[float] = 0.2
117
+ subtitle_position: Optional[str] = "bottom"
118
+ font_name: Optional[str] = "STHeitiMedium.ttc"
119
+ text_fore_color: Optional[str] = "#FFFFFF"
120
+ text_background_color: Union[bool, str] = True
121
+ font_size: int = 60
122
+ stroke_color: Optional[str] = "#000000"
123
+ stroke_width: float = 1.5
124
+ video_source: Optional[str] = "local"
125
+ subtitle_enabled: Optional[str] = "true"
126
+
127
+
128
+ class AudioRequest(BaseModel):
129
+ video_script: str
130
+ video_language: Optional[str] = ""
131
+ voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
132
+ voice_volume: Optional[float] = 1.0
133
+ voice_rate: Optional[float] = 1.2
134
+ bgm_type: Optional[str] = "random"
135
+ bgm_file: Optional[str] = ""
136
+ bgm_volume: Optional[float] = 0.2
137
+ video_source: Optional[str] = "local"
138
+
139
+
140
+ class VideoScriptParams:
141
+ """
142
+ {
143
+ "video_subject": "春天的花海",
144
+ "video_language": "",
145
+ "paragraph_number": 1
146
+ }
147
+ """
148
+
149
+ video_subject: Optional[str] = "春天的花海"
150
+ video_language: Optional[str] = ""
151
+ paragraph_number: Optional[int] = 1
152
+
153
+
154
+ class VideoTermsParams:
155
+ """
156
+ {
157
+ "video_subject": "",
158
+ "video_script": "",
159
+ "amount": 5
160
+ }
161
+ """
162
+
163
+ video_subject: Optional[str] = "春天的花海"
164
+ video_script: Optional[str] = (
165
+ "春天的花海,如诗如画般展现在眼前。万物复苏的季节里,大地披上了一袭绚丽多彩的盛装。金黄的迎春、粉嫩的樱花、洁白的梨花、艳丽的郁金香……"
166
+ )
167
+ amount: Optional[int] = 5
168
+
169
+
170
+ class BaseResponse(BaseModel):
171
+ status: int = 200
172
+ message: Optional[str] = "success"
173
+ data: Any = None
174
+
175
+
176
+ class TaskVideoRequest(VideoParams, BaseModel):
177
+ pass
178
+
179
+
180
+ class TaskQueryRequest(BaseModel):
181
+ pass
182
+
183
+
184
+ class VideoScriptRequest(VideoScriptParams, BaseModel):
185
+ pass
186
+
187
+
188
+ class VideoTermsRequest(VideoTermsParams, BaseModel):
189
+ pass
190
+
191
+
192
+ ######################################################################################################
193
+ ######################################################################################################
194
+ ######################################################################################################
195
+ ######################################################################################################
196
+ class TaskResponse(BaseResponse):
197
+ class TaskResponseData(BaseModel):
198
+ task_id: str
199
+
200
+ data: TaskResponseData
201
+
202
+ class Config:
203
+ json_schema_extra = {
204
+ "example": {
205
+ "status": 200,
206
+ "message": "success",
207
+ "data": {"task_id": "6c85c8cc-a77a-42b9-bc30-947815aa0558"},
208
+ },
209
+ }
210
+
211
+
212
+ class TaskQueryResponse(BaseResponse):
213
+ class Config:
214
+ json_schema_extra = {
215
+ "example": {
216
+ "status": 200,
217
+ "message": "success",
218
+ "data": {
219
+ "state": 1,
220
+ "progress": 100,
221
+ "videos": [
222
+ "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
223
+ ],
224
+ "combined_videos": [
225
+ "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
226
+ ],
227
+ },
228
+ },
229
+ }
230
+
231
+
232
+ class TaskDeletionResponse(BaseResponse):
233
+ class Config:
234
+ json_schema_extra = {
235
+ "example": {
236
+ "status": 200,
237
+ "message": "success",
238
+ "data": {
239
+ "state": 1,
240
+ "progress": 100,
241
+ "videos": [
242
+ "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
243
+ ],
244
+ "combined_videos": [
245
+ "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
246
+ ],
247
+ },
248
+ },
249
+ }
250
+
251
+
252
+ class VideoScriptResponse(BaseResponse):
253
+ class Config:
254
+ json_schema_extra = {
255
+ "example": {
256
+ "status": 200,
257
+ "message": "success",
258
+ "data": {
259
+ "video_script": "春天的花海,是大自然的一幅美丽画卷。在这个季节里,大地复苏,万物生长,花朵争相绽放,形成了一片五彩斑斓的花海..."
260
+ },
261
+ },
262
+ }
263
+
264
+
265
+ class VideoTermsResponse(BaseResponse):
266
+ class Config:
267
+ json_schema_extra = {
268
+ "example": {
269
+ "status": 200,
270
+ "message": "success",
271
+ "data": {"video_terms": ["sky", "tree"]},
272
+ },
273
+ }
274
+
275
+
276
+ class BgmRetrieveResponse(BaseResponse):
277
+ class Config:
278
+ json_schema_extra = {
279
+ "example": {
280
+ "status": 200,
281
+ "message": "success",
282
+ "data": {
283
+ "files": [
284
+ {
285
+ "name": "output013.mp3",
286
+ "size": 1891269,
287
+ "file": "/MoneyPrinterTurbo/resource/songs/output013.mp3",
288
+ }
289
+ ]
290
+ },
291
+ },
292
+ }
293
+
294
+
295
+ class BgmUploadResponse(BaseResponse):
296
+ class Config:
297
+ json_schema_extra = {
298
+ "example": {
299
+ "status": 200,
300
+ "message": "success",
301
+ "data": {"file": "/MoneyPrinterTurbo/resource/songs/example.mp3"},
302
+ },
303
+ }
app/router.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Application configuration - root APIRouter.
2
+
3
+ Defines all FastAPI application endpoints.
4
+
5
+ Resources:
6
+ 1. https://fastapi.tiangolo.com/tutorial/bigger-applications
7
+
8
+ """
9
+
10
+ from fastapi import APIRouter
11
+
12
+ from app.controllers.v1 import llm, video
13
+
14
+ root_api_router = APIRouter()
15
+ # v1
16
+ root_api_router.include_router(video.router)
17
+ root_api_router.include_router(llm.router)
app/services/__init__.py ADDED
File without changes
app/services/llm.py ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import re
4
+ import requests
5
+ from typing import List
6
+
7
+ import g4f
8
+ from loguru import logger
9
+ from openai import AzureOpenAI, OpenAI
10
+ from openai.types.chat import ChatCompletion
11
+
12
+ from app.config import config
13
+ import os
14
+
15
+ _max_retries = 5
16
+
17
+
18
+ def _generate_response(prompt: str) -> str:
19
+ try:
20
+ content = ""
21
+ llm_provider = 'cloudflare'
22
+ logger.info(f"llm provider: {llm_provider}")
23
+ if llm_provider == "g4f":
24
+ model_name = config.app.get("g4f_model_name", "")
25
+ if not model_name:
26
+ model_name = "gpt-3.5-turbo-16k-0613"
27
+ content = g4f.ChatCompletion.create(
28
+ model=model_name,
29
+ messages=[{"role": "user", "content": prompt}],
30
+ )
31
+ else:
32
+ api_version = "" # for azure
33
+ if llm_provider == "moonshot":
34
+ api_key = config.app.get("moonshot_api_key")
35
+ model_name = config.app.get("moonshot_model_name")
36
+ base_url = "https://api.moonshot.cn/v1"
37
+ elif llm_provider == "ollama":
38
+ # api_key = config.app.get("openai_api_key")
39
+ api_key = "ollama" # any string works but you are required to have one
40
+ model_name = config.app.get("ollama_model_name")
41
+ base_url = config.app.get("ollama_base_url", "")
42
+ if not base_url:
43
+ base_url = "http://localhost:11434/v1"
44
+ elif llm_provider == "openai":
45
+ api_key = config.app.get("openai_api_key")
46
+ model_name = config.app.get("openai_model_name")
47
+ base_url = config.app.get("openai_base_url", "")
48
+ if not base_url:
49
+ base_url = "https://api.openai.com/v1"
50
+ elif llm_provider == "oneapi":
51
+ api_key = config.app.get("oneapi_api_key")
52
+ model_name = config.app.get("oneapi_model_name")
53
+ base_url = config.app.get("oneapi_base_url", "")
54
+ elif llm_provider == "azure":
55
+ api_key = config.app.get("azure_api_key")
56
+ model_name = config.app.get("azure_model_name")
57
+ base_url = config.app.get("azure_base_url", "")
58
+ api_version = config.app.get("azure_api_version", "2024-02-15-preview")
59
+ elif llm_provider == "gemini":
60
+ api_key = config.app.get("gemini_api_key")
61
+ model_name = config.app.get("gemini_model_name")
62
+ base_url = "***"
63
+ elif llm_provider == "qwen":
64
+ api_key = config.app.get("qwen_api_key")
65
+ model_name = config.app.get("qwen_model_name")
66
+ base_url = "***"
67
+ elif llm_provider == "cloudflare":
68
+ api_key = config.app.get("cloudflare_api_key")
69
+ model_name = config.app.get("cloudflare_model_name")
70
+ account_id = config.app.get("cloudflare_account_id")
71
+ base_url = "***"
72
+ elif llm_provider == "deepseek":
73
+ api_key = config.app.get("deepseek_api_key")
74
+ model_name = config.app.get("deepseek_model_name")
75
+ base_url = config.app.get("deepseek_base_url")
76
+ if not base_url:
77
+ base_url = "https://api.deepseek.com"
78
+ elif llm_provider == "ernie":
79
+ api_key = config.app.get("ernie_api_key")
80
+ secret_key = config.app.get("ernie_secret_key")
81
+ base_url = config.app.get("ernie_base_url")
82
+ model_name = "***"
83
+ if not secret_key:
84
+ raise ValueError(
85
+ f"{llm_provider}: secret_key is not set, please set it in the config.toml file."
86
+ )
87
+ elif llm_provider == "pollinations":
88
+ try:
89
+ base_url = config.app.get("pollinations_base_url", "")
90
+ if not base_url:
91
+ base_url = "https://text.pollinations.ai/openai"
92
+ model_name = config.app.get("pollinations_model_name", "openai-fast")
93
+
94
+ # Prepare the payload
95
+ payload = {
96
+ "model": model_name,
97
+ "messages": [
98
+ {"role": "user", "content": prompt}
99
+ ],
100
+ "seed": 101 # Optional but helps with reproducibility
101
+ }
102
+
103
+ # Optional parameters if configured
104
+ if config.app.get("pollinations_private"):
105
+ payload["private"] = True
106
+ if config.app.get("pollinations_referrer"):
107
+ payload["referrer"] = config.app.get("pollinations_referrer")
108
+
109
+ headers = {
110
+ "Content-Type": "application/json"
111
+ }
112
+
113
+ # Make the API request
114
+ response = requests.post(base_url, headers=headers, json=payload)
115
+ response.raise_for_status()
116
+ result = response.json()
117
+
118
+ if result and "choices" in result and len(result["choices"]) > 0:
119
+ content = result["choices"][0]["message"]["content"]
120
+ return content.replace("\n", "")
121
+ else:
122
+ raise Exception(f"[{llm_provider}] returned an invalid response format")
123
+
124
+ except requests.exceptions.RequestException as e:
125
+ raise Exception(f"[{llm_provider}] request failed: {str(e)}")
126
+ except Exception as e:
127
+ raise Exception(f"[{llm_provider}] error: {str(e)}")
128
+
129
+ if llm_provider not in ["pollinations", "ollama"]: # Skip validation for providers that don't require API key
130
+ if not api_key:
131
+ raise ValueError(
132
+ f"{llm_provider}: api_key is not set, please set it in the config.toml file."
133
+ )
134
+ if not model_name:
135
+ raise ValueError(
136
+ f"{llm_provider}: model_name is not set, please set it in the config.toml file."
137
+ )
138
+ if not base_url:
139
+ raise ValueError(
140
+ f"{llm_provider}: base_url is not set, please set it in the config.toml file."
141
+ )
142
+
143
+ if llm_provider == "qwen":
144
+ import dashscope
145
+ from dashscope.api_entities.dashscope_response import GenerationResponse
146
+
147
+ dashscope.api_key = api_key
148
+ response = dashscope.Generation.call(
149
+ model=model_name, messages=[{"role": "user", "content": prompt}]
150
+ )
151
+ if response:
152
+ if isinstance(response, GenerationResponse):
153
+ status_code = response.status_code
154
+ if status_code != 200:
155
+ raise Exception(
156
+ f'[{llm_provider}] returned an error response: "{response}"'
157
+ )
158
+
159
+ content = response["output"]["text"]
160
+ return content.replace("\n", "")
161
+ else:
162
+ raise Exception(
163
+ f'[{llm_provider}] returned an invalid response: "{response}"'
164
+ )
165
+ else:
166
+ raise Exception(f"[{llm_provider}] returned an empty response")
167
+
168
+ if llm_provider == "gemini":
169
+ import google.generativeai as genai
170
+
171
+ genai.configure(api_key=api_key, transport="rest")
172
+
173
+ generation_config = {
174
+ "temperature": 0.5,
175
+ "top_p": 1,
176
+ "top_k": 1,
177
+ "max_output_tokens": 2048,
178
+ }
179
+
180
+ safety_settings = [
181
+ {
182
+ "category": "HARM_CATEGORY_HARASSMENT",
183
+ "threshold": "BLOCK_ONLY_HIGH",
184
+ },
185
+ {
186
+ "category": "HARM_CATEGORY_HATE_SPEECH",
187
+ "threshold": "BLOCK_ONLY_HIGH",
188
+ },
189
+ {
190
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
191
+ "threshold": "BLOCK_ONLY_HIGH",
192
+ },
193
+ {
194
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
195
+ "threshold": "BLOCK_ONLY_HIGH",
196
+ },
197
+ ]
198
+
199
+ model = genai.GenerativeModel(
200
+ model_name=model_name,
201
+ generation_config=generation_config,
202
+ safety_settings=safety_settings,
203
+ )
204
+
205
+ try:
206
+ response = model.generate_content(prompt)
207
+ candidates = response.candidates
208
+ generated_text = candidates[0].content.parts[0].text
209
+ except (AttributeError, IndexError) as e:
210
+ print("Gemini Error:", e)
211
+
212
+ return generated_text
213
+
214
+ if llm_provider == "cloudflare":
215
+ response = requests.post(
216
+ f"https://api.cloudflare.com/client/v4/accounts/11b1d730ea208e2e78adc22e1e2e8059/ai/run/@cf/meta/llama-4-scout-17b-16e-instruct",
217
+ headers={"Authorization": f'Bearer {os.getenv("cloudflare")}'},
218
+ json={
219
+ "messages": [
220
+ {
221
+ "role": "system",
222
+ "content": "You are a friendly assistant",
223
+ },
224
+ {"role": "user", "content": prompt},
225
+ ]
226
+ },
227
+ )
228
+ result = response.json()
229
+ logger.info(result)
230
+ return result["result"]["response"]
231
+
232
+ if llm_provider == "ernie":
233
+ response = requests.post(
234
+ "https://aip.baidubce.com/oauth/2.0/token",
235
+ params={
236
+ "grant_type": "client_credentials",
237
+ "client_id": api_key,
238
+ "client_secret": secret_key,
239
+ }
240
+ )
241
+ access_token = response.json().get("access_token")
242
+ url = f"{base_url}?access_token={access_token}"
243
+
244
+ payload = json.dumps(
245
+ {
246
+ "messages": [{"role": "user", "content": prompt}],
247
+ "temperature": 0.5,
248
+ "top_p": 0.8,
249
+ "penalty_score": 1,
250
+ "disable_search": False,
251
+ "enable_citation": False,
252
+ "response_format": "text",
253
+ }
254
+ )
255
+ headers = {"Content-Type": "application/json"}
256
+
257
+ response = requests.request(
258
+ "POST", url, headers=headers, data=payload
259
+ ).json()
260
+ return response.get("result")
261
+
262
+ if llm_provider == "azure":
263
+ client = AzureOpenAI(
264
+ api_key=api_key,
265
+ api_version=api_version,
266
+ azure_endpoint=base_url,
267
+ )
268
+ else:
269
+ client = OpenAI(
270
+ api_key=api_key,
271
+ base_url=base_url,
272
+ )
273
+
274
+ response = client.chat.completions.create(
275
+ model=model_name, messages=[{"role": "user", "content": prompt}]
276
+ )
277
+ if response:
278
+ if isinstance(response, ChatCompletion):
279
+ content = response.choices[0].message.content
280
+ else:
281
+ raise Exception(
282
+ f'[{llm_provider}] returned an invalid response: "{response}", please check your network '
283
+ f"connection and try again."
284
+ )
285
+ else:
286
+ raise Exception(
287
+ f"[{llm_provider}] returned an empty response, please check your network connection and try again."
288
+ )
289
+
290
+ return content.replace("\n", "")
291
+ except Exception as e:
292
+ return f"Error: {str(e)}"
293
+
294
+
295
+ def generate_script(
296
+ video_subject: str, language: str = "", paragraph_number: int = 1
297
+ ) -> str:
298
+ prompt = f"""
299
+ # Role: Video Script Generator
300
+
301
+ ## Goals:
302
+ Generate a script for a video, depending on the subject of the video.
303
+
304
+ ## Constrains:
305
+ 1. the script is to be returned as a string with the specified number of paragraphs.
306
+ 2. do not under any circumstance reference this prompt in your response.
307
+ 3. get straight to the point, don't start with unnecessary things like, "welcome to this video".
308
+ 4. you must not include any type of markdown or formatting in the script, never use a title.
309
+ 5. only return the raw content of the script.
310
+ 6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line.
311
+ 7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script.
312
+ 8. respond in the same language as the video subject.
313
+
314
+ # Initialization:
315
+ - video subject: {video_subject}
316
+ - number of paragraphs: {paragraph_number}
317
+ """.strip()
318
+ if language:
319
+ prompt += f"\n- language: {language}"
320
+
321
+ final_script = ""
322
+ logger.info(f"subject: {video_subject}")
323
+
324
+ def format_response(response):
325
+ # Clean the script
326
+ # Remove asterisks, hashes
327
+ response = response.replace("*", "")
328
+ response = response.replace("#", "")
329
+
330
+ # Remove markdown syntax
331
+ response = re.sub(r"\[.*\]", "", response)
332
+ response = re.sub(r"\(.*\)", "", response)
333
+
334
+ # Split the script into paragraphs
335
+ paragraphs = response.split("\n\n")
336
+
337
+ # Select the specified number of paragraphs
338
+ # selected_paragraphs = paragraphs[:paragraph_number]
339
+
340
+ # Join the selected paragraphs into a single string
341
+ return "\n\n".join(paragraphs)
342
+
343
+ for i in range(_max_retries):
344
+ try:
345
+ response = _generate_response(prompt=prompt)
346
+ if response:
347
+ final_script = format_response(response)
348
+ else:
349
+ logging.error("gpt returned an empty response")
350
+
351
+ # g4f may return an error message
352
+ if final_script and "当日额度已消耗完" in final_script:
353
+ raise ValueError(final_script)
354
+
355
+ if final_script:
356
+ break
357
+ except Exception as e:
358
+ logger.error(f"failed to generate script: {e}")
359
+
360
+ if i < _max_retries:
361
+ logger.warning(f"failed to generate video script, trying again... {i + 1}")
362
+ if "Error: " in final_script:
363
+ logger.error(f"failed to generate video script: {final_script}")
364
+ else:
365
+ logger.success(f"completed: \n{final_script}")
366
+ return final_script.strip()
367
+
368
+
369
+ def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]:
370
+ prompt = f"""
371
+ # Role: Video Search Terms Generator
372
+
373
+ ## Goals:
374
+ Generate {amount} search terms for stock videos, depending on the subject of a video.
375
+
376
+ ## Constrains:
377
+ 1. the search terms are to be returned as a json-array of strings.
378
+ 2. each search term should consist of 1-3 words, always add the main subject of the video.
379
+ 3. you must only return the json-array of strings. you must not return anything else. you must not return the script.
380
+ 4. the search terms must be related to the subject of the video.
381
+ 5. reply with english search terms only.
382
+
383
+ ## Output Example:
384
+ ["search term 1", "search term 2", "search term 3","search term 4","search term 5"]
385
+
386
+ ## Context:
387
+ ### Video Subject
388
+ {video_subject}
389
+
390
+ ### Video Script
391
+ {video_script}
392
+
393
+ Please note that you must use English for generating video search terms; Chinese is not accepted.
394
+ """.strip()
395
+
396
+ logger.info(f"subject: {video_subject}")
397
+
398
+ search_terms = []
399
+ response = ""
400
+ for i in range(_max_retries):
401
+ try:
402
+ response = _generate_response(prompt)
403
+ if "Error: " in response:
404
+ logger.error(f"failed to generate video script: {response}")
405
+ return response
406
+ search_terms = json.loads(response)
407
+ if not isinstance(search_terms, list) or not all(
408
+ isinstance(term, str) for term in search_terms
409
+ ):
410
+ logger.error("response is not a list of strings.")
411
+ continue
412
+
413
+ except Exception as e:
414
+ logger.warning(f"failed to generate video terms: {str(e)}")
415
+ if response:
416
+ match = re.search(r"\[.*]", response)
417
+ if match:
418
+ try:
419
+ search_terms = json.loads(match.group())
420
+ except Exception as e:
421
+ logger.warning(f"failed to generate video terms: {str(e)}")
422
+ pass
423
+
424
+ if search_terms and len(search_terms) > 0:
425
+ break
426
+ if i < _max_retries:
427
+ logger.warning(f"failed to generate video terms, trying again... {i + 1}")
428
+
429
+ logger.success(f"completed: \n{search_terms}")
430
+ return search_terms
431
+
432
+
433
+ if __name__ == "__main__":
434
+ video_subject = "生命的意义是什么"
435
+ script = generate_script(
436
+ video_subject=video_subject, language="zh-CN", paragraph_number=1
437
+ )
438
+ print("######################")
439
+ print(script)
440
+ search_terms = generate_terms(
441
+ video_subject=video_subject, video_script=script, amount=5
442
+ )
443
+ print("######################")
444
+ print(search_terms)
445
+
app/services/material.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ from typing import List
4
+ from urllib.parse import urlencode
5
+
6
+ import requests
7
+ from loguru import logger
8
+ from moviepy.video.io.VideoFileClip import VideoFileClip
9
+
10
+ from app.config import config
11
+ from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode
12
+ from app.utils import utils
13
+ import os
14
+
15
+ requested_count = 0
16
+
17
+
18
+ def get_api_key(cfg_key: str):
19
+ api_keys = config.app.get(cfg_key)
20
+ if not api_keys:
21
+ raise ValueError(
22
+ f"\n\n##### {cfg_key} is not set #####\n\nPlease set it in the config.toml file: {config.config_file}\n\n"
23
+ f"{utils.to_json(config.app)}"
24
+ )
25
+
26
+ # if only one key is provided, return it
27
+ if isinstance(api_keys, str):
28
+ return api_keys
29
+
30
+ global requested_count
31
+ requested_count += 1
32
+ return api_keys[requested_count % len(api_keys)]
33
+
34
+
35
+ def search_videos_pexels(
36
+ search_term: str,
37
+ minimum_duration: int,
38
+ video_aspect: VideoAspect = VideoAspect.portrait,
39
+ ) -> List[MaterialInfo]:
40
+ aspect = VideoAspect(video_aspect)
41
+ video_orientation = aspect.name
42
+ video_width, video_height = aspect.to_resolution()
43
+ api_key = os.getenv('pexels')
44
+ headers = {
45
+ "Authorization": api_key,
46
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
47
+ }
48
+ # Build URL
49
+ params = {"query": search_term, "per_page": 20, "orientation": video_orientation}
50
+ query_url = f"https://api.pexels.com/videos/search?{urlencode(params)}"
51
+ logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
52
+
53
+ try:
54
+ r = requests.get(
55
+ query_url,
56
+ headers=headers,
57
+ proxies=config.proxy,
58
+ verify=False,
59
+ timeout=(30, 60),
60
+ )
61
+ response = r.json()
62
+ video_items = []
63
+ if "videos" not in response:
64
+ logger.error(f"search videos failed: {response}")
65
+ return video_items
66
+ videos = response["videos"]
67
+ # loop through each video in the result
68
+ for v in videos:
69
+ duration = v["duration"]
70
+ # check if video has desired minimum duration
71
+ if duration < minimum_duration:
72
+ continue
73
+ video_files = v["video_files"]
74
+ # loop through each url to determine the best quality
75
+ for video in video_files:
76
+ w = int(video["width"])
77
+ h = int(video["height"])
78
+ if w == video_width and h == video_height:
79
+ item = MaterialInfo()
80
+ item.provider = "pexels"
81
+ item.url = video["link"]
82
+ item.duration = duration
83
+ video_items.append(item)
84
+ break
85
+ return video_items
86
+ except Exception as e:
87
+ logger.error(f"search videos failed: {str(e)}")
88
+
89
+ return []
90
+
91
+
92
+ def search_videos_pixabay(
93
+ search_term: str,
94
+ minimum_duration: int,
95
+ video_aspect: VideoAspect = VideoAspect.portrait,
96
+ ) -> List[MaterialInfo]:
97
+ aspect = VideoAspect(video_aspect)
98
+
99
+ video_width, video_height = aspect.to_resolution()
100
+
101
+ api_key = get_api_key("pixabay_api_keys")
102
+ # Build URL
103
+ params = {
104
+ "q": search_term,
105
+ "video_type": "all", # Accepted values: "all", "film", "animation"
106
+ "per_page": 50,
107
+ "key": api_key,
108
+ }
109
+ query_url = f"https://pixabay.com/api/videos/?{urlencode(params)}"
110
+ logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
111
+
112
+ try:
113
+ r = requests.get(
114
+ query_url, proxies=config.proxy, verify=False, timeout=(30, 60)
115
+ )
116
+ response = r.json()
117
+ video_items = []
118
+ if "hits" not in response:
119
+ logger.error(f"search videos failed: {response}")
120
+ return video_items
121
+ videos = response["hits"]
122
+ # loop through each video in the result
123
+ for v in videos:
124
+ duration = v["duration"]
125
+ # check if video has desired minimum duration
126
+ if duration < minimum_duration:
127
+ continue
128
+ video_files = v["videos"]
129
+ # loop through each url to determine the best quality
130
+ for video_type in video_files:
131
+ video = video_files[video_type]
132
+ w = int(video["width"])
133
+ # h = int(video["height"])
134
+ if w >= video_width:
135
+ item = MaterialInfo()
136
+ item.provider = "pixabay"
137
+ item.url = video["url"]
138
+ item.duration = duration
139
+ video_items.append(item)
140
+ break
141
+ return video_items
142
+ except Exception as e:
143
+ logger.error(f"search videos failed: {str(e)}")
144
+
145
+ return []
146
+
147
+
148
+ def save_video(video_url: str, save_dir: str = "") -> str:
149
+ if not save_dir:
150
+ save_dir = utils.storage_dir("cache_videos")
151
+
152
+ if not os.path.exists(save_dir):
153
+ os.makedirs(save_dir)
154
+
155
+ url_without_query = video_url.split("?")[0]
156
+ url_hash = utils.md5(url_without_query)
157
+ video_id = f"vid-{url_hash}"
158
+ video_path = f"{save_dir}/{video_id}.mp4"
159
+
160
+ # if video already exists, return the path
161
+ if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
162
+ logger.info(f"video already exists: {video_path}")
163
+ return video_path
164
+
165
+ headers = {
166
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
167
+ }
168
+
169
+ # if video does not exist, download it
170
+ with open(video_path, "wb") as f:
171
+ f.write(
172
+ requests.get(
173
+ video_url,
174
+ headers=headers,
175
+ proxies=config.proxy,
176
+ verify=False,
177
+ timeout=(60, 240),
178
+ ).content
179
+ )
180
+
181
+ if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
182
+ try:
183
+ clip = VideoFileClip(video_path)
184
+ duration = clip.duration
185
+ fps = clip.fps
186
+ clip.close()
187
+ if duration > 0 and fps > 0:
188
+ return video_path
189
+ except Exception as e:
190
+ try:
191
+ os.remove(video_path)
192
+ except Exception:
193
+ pass
194
+ logger.warning(f"invalid video file: {video_path} => {str(e)}")
195
+ return ""
196
+
197
+
198
+ def download_videos(
199
+ task_id: str,
200
+ search_terms: List[str],
201
+ source: str = "pexels",
202
+ video_aspect: VideoAspect = VideoAspect.portrait,
203
+ video_contact_mode: VideoConcatMode = VideoConcatMode.random,
204
+ audio_duration: float = 0.0,
205
+ max_clip_duration: int = 5,
206
+ ) -> List[str]:
207
+ valid_video_items = []
208
+ valid_video_urls = []
209
+ found_duration = 0.0
210
+ search_videos = search_videos_pexels
211
+ if source == "pixabay":
212
+ search_videos = search_videos_pixabay
213
+
214
+ for search_term in search_terms:
215
+ video_items = search_videos(
216
+ search_term=search_term,
217
+ minimum_duration=max_clip_duration,
218
+ video_aspect=video_aspect,
219
+ )
220
+ logger.info(f"found {len(video_items)} videos for '{search_term}'")
221
+
222
+ for item in video_items:
223
+ if item.url not in valid_video_urls:
224
+ valid_video_items.append(item)
225
+ valid_video_urls.append(item.url)
226
+ found_duration += item.duration
227
+
228
+ logger.info(
229
+ f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds"
230
+ )
231
+ video_paths = []
232
+
233
+ material_directory = config.app.get("material_directory", "").strip()
234
+ if material_directory == "task":
235
+ material_directory = utils.task_dir(task_id)
236
+ elif material_directory and not os.path.isdir(material_directory):
237
+ material_directory = ""
238
+
239
+ if video_contact_mode.value == VideoConcatMode.random.value:
240
+ random.shuffle(valid_video_items)
241
+
242
+ total_duration = 0.0
243
+ for item in valid_video_items:
244
+ try:
245
+ logger.info(f"downloading video: {item.url}")
246
+ saved_video_path = save_video(
247
+ video_url=item.url, save_dir=material_directory
248
+ )
249
+ if saved_video_path:
250
+ logger.info(f"video saved: {saved_video_path}")
251
+ video_paths.append(saved_video_path)
252
+ seconds = min(max_clip_duration, item.duration)
253
+ total_duration += seconds
254
+ if total_duration > audio_duration:
255
+ logger.info(
256
+ f"total duration of downloaded videos: {total_duration} seconds, skip downloading more"
257
+ )
258
+ break
259
+ except Exception as e:
260
+ logger.error(f"failed to download video: {utils.to_json(item)} => {str(e)}")
261
+ logger.success(f"downloaded {len(video_paths)} videos")
262
+ return video_paths
263
+
264
+
265
+ if __name__ == "__main__":
266
+ download_videos(
267
+ "test123", ["Money Exchange Medium"], audio_duration=100, source="pixabay"
268
+ )
app/services/state.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ from abc import ABC, abstractmethod
3
+
4
+ from app.config import config
5
+ from app.models import const
6
+
7
+
8
+ # Base class for state management
9
+ class BaseState(ABC):
10
+ @abstractmethod
11
+ def update_task(self, task_id: str, state: int, progress: int = 0, **kwargs):
12
+ pass
13
+
14
+ @abstractmethod
15
+ def get_task(self, task_id: str):
16
+ pass
17
+
18
+ @abstractmethod
19
+ def get_all_tasks(self, page: int, page_size: int):
20
+ pass
21
+
22
+
23
+ # Memory state management
24
+ class MemoryState(BaseState):
25
+ def __init__(self):
26
+ self._tasks = {}
27
+
28
+ def get_all_tasks(self, page: int, page_size: int):
29
+ start = (page - 1) * page_size
30
+ end = start + page_size
31
+ tasks = list(self._tasks.values())
32
+ total = len(tasks)
33
+ return tasks[start:end], total
34
+
35
+ def update_task(
36
+ self,
37
+ task_id: str,
38
+ state: int = const.TASK_STATE_PROCESSING,
39
+ progress: int = 0,
40
+ **kwargs,
41
+ ):
42
+ progress = int(progress)
43
+ if progress > 100:
44
+ progress = 100
45
+
46
+ self._tasks[task_id] = {
47
+ "task_id": task_id,
48
+ "state": state,
49
+ "progress": progress,
50
+ **kwargs,
51
+ }
52
+
53
+ def get_task(self, task_id: str):
54
+ return self._tasks.get(task_id, None)
55
+
56
+ def delete_task(self, task_id: str):
57
+ if task_id in self._tasks:
58
+ del self._tasks[task_id]
59
+
60
+
61
+ # Redis state management
62
+ class RedisState(BaseState):
63
+ def __init__(self, host="localhost", port=6379, db=0, password=None):
64
+ import redis
65
+
66
+ self._redis = redis.StrictRedis(host=host, port=port, db=db, password=password)
67
+
68
+ def get_all_tasks(self, page: int, page_size: int):
69
+ start = (page - 1) * page_size
70
+ end = start + page_size
71
+ tasks = []
72
+ cursor = 0
73
+ total = 0
74
+ while True:
75
+ cursor, keys = self._redis.scan(cursor, count=page_size)
76
+ total += len(keys)
77
+ if total > start:
78
+ for key in keys[max(0, start - total):end - total]:
79
+ task_data = self._redis.hgetall(key)
80
+ task = {
81
+ k.decode("utf-8"): self._convert_to_original_type(v) for k, v in task_data.items()
82
+ }
83
+ tasks.append(task)
84
+ if len(tasks) >= page_size:
85
+ break
86
+ if cursor == 0 or len(tasks) >= page_size:
87
+ break
88
+ return tasks, total
89
+
90
+ def update_task(
91
+ self,
92
+ task_id: str,
93
+ state: int = const.TASK_STATE_PROCESSING,
94
+ progress: int = 0,
95
+ **kwargs,
96
+ ):
97
+ progress = int(progress)
98
+ if progress > 100:
99
+ progress = 100
100
+
101
+ fields = {
102
+ "task_id": task_id,
103
+ "state": state,
104
+ "progress": progress,
105
+ **kwargs,
106
+ }
107
+
108
+ for field, value in fields.items():
109
+ self._redis.hset(task_id, field, str(value))
110
+
111
+ def get_task(self, task_id: str):
112
+ task_data = self._redis.hgetall(task_id)
113
+ if not task_data:
114
+ return None
115
+
116
+ task = {
117
+ key.decode("utf-8"): self._convert_to_original_type(value)
118
+ for key, value in task_data.items()
119
+ }
120
+ return task
121
+
122
+ def delete_task(self, task_id: str):
123
+ self._redis.delete(task_id)
124
+
125
+ @staticmethod
126
+ def _convert_to_original_type(value):
127
+ """
128
+ Convert the value from byte string to its original data type.
129
+ You can extend this method to handle other data types as needed.
130
+ """
131
+ value_str = value.decode("utf-8")
132
+
133
+ try:
134
+ # try to convert byte string array to list
135
+ return ast.literal_eval(value_str)
136
+ except (ValueError, SyntaxError):
137
+ pass
138
+
139
+ if value_str.isdigit():
140
+ return int(value_str)
141
+ # Add more conversions here if needed
142
+ return value_str
143
+
144
+
145
+ # Global state
146
+ _enable_redis = config.app.get("enable_redis", False)
147
+ _redis_host = config.app.get("redis_host", "localhost")
148
+ _redis_port = config.app.get("redis_port", 6379)
149
+ _redis_db = config.app.get("redis_db", 0)
150
+ _redis_password = config.app.get("redis_password", None)
151
+
152
+ state = (
153
+ RedisState(
154
+ host=_redis_host, port=_redis_port, db=_redis_db, password=_redis_password
155
+ )
156
+ if _enable_redis
157
+ else MemoryState()
158
+ )
app/services/subtitle.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os.path
3
+ import re
4
+ from timeit import default_timer as timer
5
+
6
+ from faster_whisper import WhisperModel
7
+ from loguru import logger
8
+
9
+ from app.config import config
10
+ from app.utils import utils
11
+
12
+ model_size = config.whisper.get("model_size", "large-v3")
13
+ device = config.whisper.get("device", "cpu")
14
+ compute_type = config.whisper.get("compute_type", "int8")
15
+ model = None
16
+
17
+
18
+ def create(audio_file, subtitle_file: str = ""):
19
+ global model
20
+ if not model:
21
+ model_path = f"{utils.root_dir()}/models/whisper-{model_size}"
22
+ model_bin_file = f"{model_path}/model.bin"
23
+ if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
24
+ model_path = model_size
25
+
26
+ logger.info(
27
+ f"loading model: {model_path}, device: {device}, compute_type: {compute_type}"
28
+ )
29
+ try:
30
+ model = WhisperModel(
31
+ model_size_or_path=model_path, device=device, compute_type=compute_type
32
+ )
33
+ except Exception as e:
34
+ logger.error(
35
+ f"failed to load model: {e} \n\n"
36
+ f"********************************************\n"
37
+ f"this may be caused by network issue. \n"
38
+ f"please download the model manually and put it in the 'models' folder. \n"
39
+ f"see [README.md FAQ](https://github.com/harry0703/MoneyPrinterTurbo) for more details.\n"
40
+ f"********************************************\n\n"
41
+ )
42
+ return None
43
+
44
+ logger.info(f"start, output file: {subtitle_file}")
45
+ if not subtitle_file:
46
+ subtitle_file = f"{audio_file}.srt"
47
+
48
+ segments, info = model.transcribe(
49
+ audio_file,
50
+ beam_size=5,
51
+ word_timestamps=True,
52
+ vad_filter=True,
53
+ vad_parameters=dict(min_silence_duration_ms=500),
54
+ )
55
+
56
+ logger.info(
57
+ f"detected language: '{info.language}', probability: {info.language_probability:.2f}"
58
+ )
59
+
60
+ start = timer()
61
+ subtitles = []
62
+
63
+ def recognized(seg_text, seg_start, seg_end):
64
+ seg_text = seg_text.strip()
65
+ if not seg_text:
66
+ return
67
+
68
+ msg = "[%.2fs -> %.2fs] %s" % (seg_start, seg_end, seg_text)
69
+ logger.debug(msg)
70
+
71
+ subtitles.append(
72
+ {"msg": seg_text, "start_time": seg_start, "end_time": seg_end}
73
+ )
74
+
75
+ for segment in segments:
76
+ words_idx = 0
77
+ words_len = len(segment.words)
78
+
79
+ seg_start = 0
80
+ seg_end = 0
81
+ seg_text = ""
82
+
83
+ if segment.words:
84
+ is_segmented = False
85
+ for word in segment.words:
86
+ if not is_segmented:
87
+ seg_start = word.start
88
+ is_segmented = True
89
+
90
+ seg_end = word.end
91
+ # If it contains punctuation, then break the sentence.
92
+ seg_text += word.word
93
+
94
+ if utils.str_contains_punctuation(word.word):
95
+ # remove last char
96
+ seg_text = seg_text[:-1]
97
+ if not seg_text:
98
+ continue
99
+
100
+ recognized(seg_text, seg_start, seg_end)
101
+
102
+ is_segmented = False
103
+ seg_text = ""
104
+
105
+ if words_idx == 0 and segment.start < word.start:
106
+ seg_start = word.start
107
+ if words_idx == (words_len - 1) and segment.end > word.end:
108
+ seg_end = word.end
109
+ words_idx += 1
110
+
111
+ if not seg_text:
112
+ continue
113
+
114
+ recognized(seg_text, seg_start, seg_end)
115
+
116
+ end = timer()
117
+
118
+ diff = end - start
119
+ logger.info(f"complete, elapsed: {diff:.2f} s")
120
+
121
+ idx = 1
122
+ lines = []
123
+ for subtitle in subtitles:
124
+ text = subtitle.get("msg")
125
+ if text:
126
+ lines.append(
127
+ utils.text_to_srt(
128
+ idx, text, subtitle.get("start_time"), subtitle.get("end_time")
129
+ )
130
+ )
131
+ idx += 1
132
+
133
+ sub = "\n".join(lines) + "\n"
134
+ with open(subtitle_file, "w", encoding="utf-8") as f:
135
+ f.write(sub)
136
+ logger.info(f"subtitle file created: {subtitle_file}")
137
+
138
+
139
+ def file_to_subtitles(filename):
140
+ if not filename or not os.path.isfile(filename):
141
+ return []
142
+
143
+ times_texts = []
144
+ current_times = None
145
+ current_text = ""
146
+ index = 0
147
+ with open(filename, "r", encoding="utf-8") as f:
148
+ for line in f:
149
+ times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
150
+ if times:
151
+ current_times = line
152
+ elif line.strip() == "" and current_times:
153
+ index += 1
154
+ times_texts.append((index, current_times.strip(), current_text.strip()))
155
+ current_times, current_text = None, ""
156
+ elif current_times:
157
+ current_text += line
158
+ return times_texts
159
+
160
+
161
+ def levenshtein_distance(s1, s2):
162
+ if len(s1) < len(s2):
163
+ return levenshtein_distance(s2, s1)
164
+
165
+ if len(s2) == 0:
166
+ return len(s1)
167
+
168
+ previous_row = range(len(s2) + 1)
169
+ for i, c1 in enumerate(s1):
170
+ current_row = [i + 1]
171
+ for j, c2 in enumerate(s2):
172
+ insertions = previous_row[j + 1] + 1
173
+ deletions = current_row[j] + 1
174
+ substitutions = previous_row[j] + (c1 != c2)
175
+ current_row.append(min(insertions, deletions, substitutions))
176
+ previous_row = current_row
177
+
178
+ return previous_row[-1]
179
+
180
+
181
+ def similarity(a, b):
182
+ distance = levenshtein_distance(a.lower(), b.lower())
183
+ max_length = max(len(a), len(b))
184
+ return 1 - (distance / max_length)
185
+
186
+
187
+ def correct(subtitle_file, video_script):
188
+ subtitle_items = file_to_subtitles(subtitle_file)
189
+ script_lines = utils.split_string_by_punctuations(video_script)
190
+
191
+ corrected = False
192
+ new_subtitle_items = []
193
+ script_index = 0
194
+ subtitle_index = 0
195
+
196
+ while script_index < len(script_lines) and subtitle_index < len(subtitle_items):
197
+ script_line = script_lines[script_index].strip()
198
+ subtitle_line = subtitle_items[subtitle_index][2].strip()
199
+
200
+ if script_line == subtitle_line:
201
+ new_subtitle_items.append(subtitle_items[subtitle_index])
202
+ script_index += 1
203
+ subtitle_index += 1
204
+ else:
205
+ combined_subtitle = subtitle_line
206
+ start_time = subtitle_items[subtitle_index][1].split(" --> ")[0]
207
+ end_time = subtitle_items[subtitle_index][1].split(" --> ")[1]
208
+ next_subtitle_index = subtitle_index + 1
209
+
210
+ while next_subtitle_index < len(subtitle_items):
211
+ next_subtitle = subtitle_items[next_subtitle_index][2].strip()
212
+ if similarity(
213
+ script_line, combined_subtitle + " " + next_subtitle
214
+ ) > similarity(script_line, combined_subtitle):
215
+ combined_subtitle += " " + next_subtitle
216
+ end_time = subtitle_items[next_subtitle_index][1].split(" --> ")[1]
217
+ next_subtitle_index += 1
218
+ else:
219
+ break
220
+
221
+ if similarity(script_line, combined_subtitle) > 0.8:
222
+ logger.warning(
223
+ f"Merged/Corrected - Script: {script_line}, Subtitle: {combined_subtitle}"
224
+ )
225
+ new_subtitle_items.append(
226
+ (
227
+ len(new_subtitle_items) + 1,
228
+ f"{start_time} --> {end_time}",
229
+ script_line,
230
+ )
231
+ )
232
+ corrected = True
233
+ else:
234
+ logger.warning(
235
+ f"Mismatch - Script: {script_line}, Subtitle: {combined_subtitle}"
236
+ )
237
+ new_subtitle_items.append(
238
+ (
239
+ len(new_subtitle_items) + 1,
240
+ f"{start_time} --> {end_time}",
241
+ script_line,
242
+ )
243
+ )
244
+ corrected = True
245
+
246
+ script_index += 1
247
+ subtitle_index = next_subtitle_index
248
+
249
+ # Process the remaining lines of the script.
250
+ while script_index < len(script_lines):
251
+ logger.warning(f"Extra script line: {script_lines[script_index]}")
252
+ if subtitle_index < len(subtitle_items):
253
+ new_subtitle_items.append(
254
+ (
255
+ len(new_subtitle_items) + 1,
256
+ subtitle_items[subtitle_index][1],
257
+ script_lines[script_index],
258
+ )
259
+ )
260
+ subtitle_index += 1
261
+ else:
262
+ new_subtitle_items.append(
263
+ (
264
+ len(new_subtitle_items) + 1,
265
+ "00:00:00,000 --> 00:00:00,000",
266
+ script_lines[script_index],
267
+ )
268
+ )
269
+ script_index += 1
270
+ corrected = True
271
+
272
+ if corrected:
273
+ with open(subtitle_file, "w", encoding="utf-8") as fd:
274
+ for i, item in enumerate(new_subtitle_items):
275
+ fd.write(f"{i + 1}\n{item[1]}\n{item[2]}\n\n")
276
+ logger.info("Subtitle corrected")
277
+ else:
278
+ logger.success("Subtitle is correct")
279
+
280
+
281
+ if __name__ == "__main__":
282
+ task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
283
+ task_dir = utils.task_dir(task_id)
284
+ subtitle_file = f"{task_dir}/subtitle.srt"
285
+ audio_file = f"{task_dir}/audio.mp3"
286
+
287
+ subtitles = file_to_subtitles(subtitle_file)
288
+ print(subtitles)
289
+
290
+ script_file = f"{task_dir}/script.json"
291
+ with open(script_file, "r") as f:
292
+ script_content = f.read()
293
+ s = json.loads(script_content)
294
+ script = s.get("script")
295
+
296
+ correct(subtitle_file, script)
297
+
298
+ subtitle_file = f"{task_dir}/subtitle-test.srt"
299
+ create(audio_file, subtitle_file)
app/services/task.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import os.path
3
+ import re
4
+ from os import path
5
+
6
+ from loguru import logger
7
+
8
+ from app.config import config
9
+ from app.models import const
10
+ from app.models.schema import VideoConcatMode, VideoParams
11
+ from app.services import llm, material, subtitle, video, voice
12
+ from app.services import state as sm
13
+ from app.utils import utils
14
+
15
+
16
+ def generate_script(task_id, params):
17
+ logger.info("\n\n## generating video script")
18
+ video_script = params.video_script.strip()
19
+ if not video_script:
20
+ video_script = llm.generate_script(
21
+ video_subject=params.video_subject,
22
+ language=params.video_language,
23
+ paragraph_number=params.paragraph_number,
24
+ )
25
+ else:
26
+ logger.debug(f"video script: \n{video_script}")
27
+
28
+ if not video_script:
29
+ sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
30
+ logger.error("failed to generate video script.")
31
+ return None
32
+
33
+ return video_script
34
+
35
+
36
+ def generate_terms(task_id, params, video_script):
37
+ logger.info("\n\n## generating video terms")
38
+ video_terms = params.video_terms
39
+ if not video_terms:
40
+ video_terms = llm.generate_terms(
41
+ video_subject=params.video_subject, video_script=video_script, amount=5
42
+ )
43
+ else:
44
+ if isinstance(video_terms, str):
45
+ video_terms = [term.strip() for term in re.split(r"[,,]", video_terms)]
46
+ elif isinstance(video_terms, list):
47
+ video_terms = [term.strip() for term in video_terms]
48
+ else:
49
+ raise ValueError("video_terms must be a string or a list of strings.")
50
+
51
+ logger.debug(f"video terms: {utils.to_json(video_terms)}")
52
+
53
+ if not video_terms:
54
+ sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
55
+ logger.error("failed to generate video terms.")
56
+ return None
57
+
58
+ return video_terms
59
+
60
+
61
+ def save_script_data(task_id, video_script, video_terms, params):
62
+ script_file = path.join(utils.task_dir(task_id), "script.json")
63
+ script_data = {
64
+ "script": video_script,
65
+ "search_terms": video_terms,
66
+ "params": params,
67
+ }
68
+
69
+ with open(script_file, "w", encoding="utf-8") as f:
70
+ f.write(utils.to_json(script_data))
71
+
72
+
73
+ def generate_audio(task_id, params, video_script):
74
+ logger.info("\n\n## generating audio")
75
+ audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
76
+ sub_maker = voice.tts(
77
+ text=video_script,
78
+ voice_name=voice.parse_voice_name(params.voice_name),
79
+ voice_rate=params.voice_rate,
80
+ voice_file=audio_file,
81
+ )
82
+ if sub_maker is None:
83
+ sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
84
+ logger.error(
85
+ """failed to generate audio:
86
+ 1. check if the language of the voice matches the language of the video script.
87
+ 2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
88
+ """.strip()
89
+ )
90
+ return None, None, None
91
+
92
+ audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
93
+ return audio_file, audio_duration, sub_maker
94
+
95
+
96
+ def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
97
+ if not params.subtitle_enabled:
98
+ return ""
99
+
100
+ subtitle_path = path.join(utils.task_dir(task_id), "subtitle.srt")
101
+ subtitle_provider = config.app.get("subtitle_provider", "edge").strip().lower()
102
+ logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
103
+
104
+ subtitle_fallback = False
105
+ if subtitle_provider == "edge":
106
+ voice.create_subtitle(
107
+ text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path
108
+ )
109
+ if not os.path.exists(subtitle_path):
110
+ subtitle_fallback = True
111
+ logger.warning("subtitle file not found, fallback to whisper")
112
+
113
+ if subtitle_provider == "whisper" or subtitle_fallback:
114
+ subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
115
+ logger.info("\n\n## correcting subtitle")
116
+ subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
117
+
118
+ subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
119
+ if not subtitle_lines:
120
+ logger.warning(f"subtitle file is invalid: {subtitle_path}")
121
+ return ""
122
+
123
+ return subtitle_path
124
+
125
+
126
+ def get_video_materials(task_id, params, video_terms, audio_duration):
127
+ if params.video_source == "local":
128
+ logger.info("\n\n## preprocess local materials")
129
+ materials = video.preprocess_video(
130
+ materials=params.video_materials, clip_duration=params.video_clip_duration
131
+ )
132
+ if not materials:
133
+ sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
134
+ logger.error(
135
+ "no valid materials found, please check the materials and try again."
136
+ )
137
+ return None
138
+ return [material_info.url for material_info in materials]
139
+ else:
140
+ logger.info(f"\n\n## downloading videos from {params.video_source}")
141
+ downloaded_videos = material.download_videos(
142
+ task_id=task_id,
143
+ search_terms=video_terms,
144
+ source=params.video_source,
145
+ video_aspect=params.video_aspect,
146
+ video_contact_mode=params.video_concat_mode,
147
+ audio_duration=audio_duration * params.video_count,
148
+ max_clip_duration=params.video_clip_duration,
149
+ )
150
+ if not downloaded_videos:
151
+ sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
152
+ logger.error(
153
+ "failed to download videos, maybe the network is not available. if you are in China, please use a VPN."
154
+ )
155
+ return None
156
+ return downloaded_videos
157
+
158
+
159
+ def generate_final_videos(
160
+ task_id, params, downloaded_videos, audio_file, subtitle_path
161
+ ):
162
+ final_video_paths = []
163
+ combined_video_paths = []
164
+ video_concat_mode = (
165
+ params.video_concat_mode if params.video_count == 1 else VideoConcatMode.random
166
+ )
167
+ video_transition_mode = params.video_transition_mode
168
+
169
+ _progress = 50
170
+ for i in range(params.video_count):
171
+ index = i + 1
172
+ combined_video_path = path.join(
173
+ utils.task_dir(task_id), f"combined-{index}.mp4"
174
+ )
175
+ logger.info(f"\n\n## combining video: {index} => {combined_video_path}")
176
+ video.combine_videos(
177
+ combined_video_path=combined_video_path,
178
+ video_paths=downloaded_videos,
179
+ audio_file=audio_file,
180
+ video_aspect=params.video_aspect,
181
+ video_concat_mode=video_concat_mode,
182
+ video_transition_mode=video_transition_mode,
183
+ max_clip_duration=params.video_clip_duration,
184
+ threads=params.n_threads,
185
+ )
186
+
187
+ _progress += 50 / params.video_count / 2
188
+ sm.state.update_task(task_id, progress=_progress)
189
+
190
+ final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
191
+
192
+ logger.info(f"\n\n## generating video: {index} => {final_video_path}")
193
+ video.generate_video(
194
+ video_path=combined_video_path,
195
+ audio_path=audio_file,
196
+ subtitle_path=subtitle_path,
197
+ output_file=final_video_path,
198
+ params=params,
199
+ )
200
+
201
+ _progress += 50 / params.video_count / 2
202
+ sm.state.update_task(task_id, progress=_progress)
203
+
204
+ final_video_paths.append(final_video_path)
205
+ combined_video_paths.append(combined_video_path)
206
+
207
+ return final_video_paths, combined_video_paths
208
+
209
+
210
+ def start(task_id, params: VideoParams, stop_at: str = "video"):
211
+ logger.info(f"start task: {task_id}, stop_at: {stop_at}")
212
+ sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
213
+
214
+ if type(params.video_concat_mode) is str:
215
+ params.video_concat_mode = VideoConcatMode(params.video_concat_mode)
216
+
217
+ # 1. Generate script
218
+ video_script = generate_script(task_id, params)
219
+ if not video_script or "Error: " in video_script:
220
+ sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
221
+ return
222
+
223
+ sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10)
224
+
225
+ if stop_at == "script":
226
+ sm.state.update_task(
227
+ task_id, state=const.TASK_STATE_COMPLETE, progress=100, script=video_script
228
+ )
229
+ return {"script": video_script}
230
+
231
+ # 2. Generate terms
232
+ video_terms = ""
233
+ if params.video_source != "local":
234
+ video_terms = generate_terms(task_id, params, video_script)
235
+ if not video_terms:
236
+ sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
237
+ return
238
+
239
+ save_script_data(task_id, video_script, video_terms, params)
240
+
241
+ if stop_at == "terms":
242
+ sm.state.update_task(
243
+ task_id, state=const.TASK_STATE_COMPLETE, progress=100, terms=video_terms
244
+ )
245
+ return {"script": video_script, "terms": video_terms}
246
+
247
+ sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
248
+
249
+ # 3. Generate audio
250
+ audio_file, audio_duration, sub_maker = generate_audio(
251
+ task_id, params, video_script
252
+ )
253
+ if not audio_file:
254
+ sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
255
+ return
256
+
257
+ sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
258
+
259
+ if stop_at == "audio":
260
+ sm.state.update_task(
261
+ task_id,
262
+ state=const.TASK_STATE_COMPLETE,
263
+ progress=100,
264
+ audio_file=audio_file,
265
+ )
266
+ return {"audio_file": audio_file, "audio_duration": audio_duration}
267
+
268
+ # 4. Generate subtitle
269
+ subtitle_path = generate_subtitle(
270
+ task_id, params, video_script, sub_maker, audio_file
271
+ )
272
+
273
+ if stop_at == "subtitle":
274
+ sm.state.update_task(
275
+ task_id,
276
+ state=const.TASK_STATE_COMPLETE,
277
+ progress=100,
278
+ subtitle_path=subtitle_path,
279
+ )
280
+ return {"subtitle_path": subtitle_path}
281
+
282
+ sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
283
+
284
+ # 5. Get video materials
285
+ downloaded_videos = get_video_materials(
286
+ task_id, params, video_terms, audio_duration
287
+ )
288
+ if not downloaded_videos:
289
+ sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
290
+ return
291
+
292
+ if stop_at == "materials":
293
+ sm.state.update_task(
294
+ task_id,
295
+ state=const.TASK_STATE_COMPLETE,
296
+ progress=100,
297
+ materials=downloaded_videos,
298
+ )
299
+ return {"materials": downloaded_videos}
300
+
301
+ sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
302
+
303
+ # 6. Generate final videos
304
+ final_video_paths, combined_video_paths = generate_final_videos(
305
+ task_id, params, downloaded_videos, audio_file, subtitle_path
306
+ )
307
+
308
+ if not final_video_paths:
309
+ sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
310
+ return
311
+
312
+ logger.success(
313
+ f"task {task_id} finished, generated {len(final_video_paths)} videos."
314
+ )
315
+
316
+ kwargs = {
317
+ "videos": final_video_paths,
318
+ "combined_videos": combined_video_paths,
319
+ "script": video_script,
320
+ "terms": video_terms,
321
+ "audio_file": audio_file,
322
+ "audio_duration": audio_duration,
323
+ "subtitle_path": subtitle_path,
324
+ "materials": downloaded_videos,
325
+ }
326
+ sm.state.update_task(
327
+ task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs
328
+ )
329
+ return kwargs
330
+
331
+
332
+ if __name__ == "__main__":
333
+ task_id = "task_id"
334
+ params = VideoParams(
335
+ video_subject="金钱的作用",
336
+ voice_name="zh-CN-XiaoyiNeural-Female",
337
+ voice_rate=1.0,
338
+ )
339
+ start(task_id, params, stop_at="video")
app/services/utils/video_effects.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy import Clip, vfx
2
+
3
+
4
+ # FadeIn
5
+ def fadein_transition(clip: Clip, t: float) -> Clip:
6
+ return clip.with_effects([vfx.FadeIn(t)])
7
+
8
+
9
+ # FadeOut
10
+ def fadeout_transition(clip: Clip, t: float) -> Clip:
11
+ return clip.with_effects([vfx.FadeOut(t)])
12
+
13
+
14
+ # SlideIn
15
+ def slidein_transition(clip: Clip, t: float, side: str) -> Clip:
16
+ return clip.with_effects([vfx.SlideIn(t, side)])
17
+
18
+
19
+ # SlideOut
20
+ def slideout_transition(clip: Clip, t: float, side: str) -> Clip:
21
+ return clip.with_effects([vfx.SlideOut(t, side)])
app/services/video.py ADDED
@@ -0,0 +1,531 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import itertools
3
+ import os
4
+ import random
5
+ import gc
6
+ import shutil
7
+ from typing import List
8
+ from loguru import logger
9
+ from moviepy import (
10
+ AudioFileClip,
11
+ ColorClip,
12
+ CompositeAudioClip,
13
+ CompositeVideoClip,
14
+ ImageClip,
15
+ TextClip,
16
+ VideoFileClip,
17
+ afx,
18
+ concatenate_videoclips,
19
+ )
20
+ from moviepy.video.tools.subtitles import SubtitlesClip
21
+ from PIL import ImageFont
22
+
23
+ from app.models import const
24
+ from app.models.schema import (
25
+ MaterialInfo,
26
+ VideoAspect,
27
+ VideoConcatMode,
28
+ VideoParams,
29
+ VideoTransitionMode,
30
+ )
31
+ from app.services.utils import video_effects
32
+ from app.utils import utils
33
+
34
+ class SubClippedVideoClip:
35
+ def __init__(self, file_path, start_time=None, end_time=None, width=None, height=None, duration=None):
36
+ self.file_path = file_path
37
+ self.start_time = start_time
38
+ self.end_time = end_time
39
+ self.width = width
40
+ self.height = height
41
+ if duration is None:
42
+ self.duration = end_time - start_time
43
+ else:
44
+ self.duration = duration
45
+
46
+ def __str__(self):
47
+ return f"SubClippedVideoClip(file_path={self.file_path}, start_time={self.start_time}, end_time={self.end_time}, duration={self.duration}, width={self.width}, height={self.height})"
48
+
49
+
50
+ audio_codec = "aac"
51
+ video_codec = "libx264"
52
+ fps = 30
53
+
54
+ def close_clip(clip):
55
+ if clip is None:
56
+ return
57
+
58
+ try:
59
+ # close main resources
60
+ if hasattr(clip, 'reader') and clip.reader is not None:
61
+ clip.reader.close()
62
+
63
+ # close audio resources
64
+ if hasattr(clip, 'audio') and clip.audio is not None:
65
+ if hasattr(clip.audio, 'reader') and clip.audio.reader is not None:
66
+ clip.audio.reader.close()
67
+ del clip.audio
68
+
69
+ # close mask resources
70
+ if hasattr(clip, 'mask') and clip.mask is not None:
71
+ if hasattr(clip.mask, 'reader') and clip.mask.reader is not None:
72
+ clip.mask.reader.close()
73
+ del clip.mask
74
+
75
+ # handle child clips in composite clips
76
+ if hasattr(clip, 'clips') and clip.clips:
77
+ for child_clip in clip.clips:
78
+ if child_clip is not clip: # avoid possible circular references
79
+ close_clip(child_clip)
80
+
81
+ # clear clip list
82
+ if hasattr(clip, 'clips'):
83
+ clip.clips = []
84
+
85
+ except Exception as e:
86
+ logger.error(f"failed to close clip: {str(e)}")
87
+
88
+ del clip
89
+ gc.collect()
90
+
91
+ def delete_files(files: List[str] | str):
92
+ if isinstance(files, str):
93
+ files = [files]
94
+
95
+ for file in files:
96
+ try:
97
+ os.remove(file)
98
+ except:
99
+ pass
100
+
101
+ def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
102
+ if not bgm_type:
103
+ return ""
104
+
105
+ if bgm_file and os.path.exists(bgm_file):
106
+ return bgm_file
107
+
108
+ if bgm_type == "random":
109
+ suffix = "*.mp3"
110
+ song_dir = utils.song_dir()
111
+ files = glob.glob(os.path.join(song_dir, suffix))
112
+ return random.choice(files)
113
+
114
+ return ""
115
+
116
+
117
+ def combine_videos(
118
+ combined_video_path: str,
119
+ video_paths: List[str],
120
+ audio_file: str,
121
+ video_aspect: VideoAspect = VideoAspect.portrait,
122
+ video_concat_mode: VideoConcatMode = VideoConcatMode.random,
123
+ video_transition_mode: VideoTransitionMode = None,
124
+ max_clip_duration: int = 5,
125
+ threads: int = 2,
126
+ ) -> str:
127
+ audio_clip = AudioFileClip(audio_file)
128
+ audio_duration = audio_clip.duration
129
+ logger.info(f"audio duration: {audio_duration} seconds")
130
+ # Required duration of each clip
131
+ req_dur = audio_duration / len(video_paths)
132
+ req_dur = max_clip_duration
133
+ logger.info(f"maximum clip duration: {req_dur} seconds")
134
+ output_dir = os.path.dirname(combined_video_path)
135
+
136
+ aspect = VideoAspect(video_aspect)
137
+ video_width, video_height = aspect.to_resolution()
138
+
139
+ processed_clips = []
140
+ subclipped_items = []
141
+ video_duration = 0
142
+ for video_path in video_paths:
143
+ clip = VideoFileClip(video_path)
144
+ clip_duration = clip.duration
145
+ clip_w, clip_h = clip.size
146
+ close_clip(clip)
147
+
148
+ start_time = 0
149
+
150
+ while start_time < clip_duration:
151
+ end_time = min(start_time + max_clip_duration, clip_duration)
152
+ if clip_duration - start_time >= max_clip_duration:
153
+ subclipped_items.append(SubClippedVideoClip(file_path= video_path, start_time=start_time, end_time=end_time, width=clip_w, height=clip_h))
154
+ start_time = end_time
155
+ if video_concat_mode.value == VideoConcatMode.sequential.value:
156
+ break
157
+
158
+ # random subclipped_items order
159
+ if video_concat_mode.value == VideoConcatMode.random.value:
160
+ random.shuffle(subclipped_items)
161
+
162
+ logger.debug(f"total subclipped items: {len(subclipped_items)}")
163
+
164
+ # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
165
+ for i, subclipped_item in enumerate(subclipped_items):
166
+ if video_duration > audio_duration:
167
+ break
168
+
169
+ logger.debug(f"processing clip {i+1}: {subclipped_item.width}x{subclipped_item.height}, current duration: {video_duration:.2f}s, remaining: {audio_duration - video_duration:.2f}s")
170
+
171
+ try:
172
+ clip = VideoFileClip(subclipped_item.file_path).subclipped(subclipped_item.start_time, subclipped_item.end_time)
173
+ clip_duration = clip.duration
174
+ # Not all videos are same size, so we need to resize them
175
+ clip_w, clip_h = clip.size
176
+ if clip_w != video_width or clip_h != video_height:
177
+ clip_ratio = clip.w / clip.h
178
+ video_ratio = video_width / video_height
179
+ logger.debug(f"resizing clip, source: {clip_w}x{clip_h}, ratio: {clip_ratio:.2f}, target: {video_width}x{video_height}, ratio: {video_ratio:.2f}")
180
+
181
+ if clip_ratio == video_ratio:
182
+ clip = clip.resized(new_size=(video_width, video_height))
183
+ else:
184
+ if clip_ratio > video_ratio:
185
+ scale_factor = video_width / clip_w
186
+ else:
187
+ scale_factor = video_height / clip_h
188
+
189
+ new_width = int(clip_w * scale_factor)
190
+ new_height = int(clip_h * scale_factor)
191
+
192
+ background = ColorClip(size=(video_width, video_height), color=(0, 0, 0)).with_duration(clip_duration)
193
+ clip_resized = clip.resized(new_size=(new_width, new_height)).with_position("center")
194
+ clip = CompositeVideoClip([background, clip_resized])
195
+
196
+ shuffle_side = random.choice(["left", "right", "top", "bottom"])
197
+ if video_transition_mode.value == VideoTransitionMode.none.value:
198
+ clip = clip
199
+ elif video_transition_mode.value == VideoTransitionMode.fade_in.value:
200
+ clip = video_effects.fadein_transition(clip, 1)
201
+ elif video_transition_mode.value == VideoTransitionMode.fade_out.value:
202
+ clip = video_effects.fadeout_transition(clip, 1)
203
+ elif video_transition_mode.value == VideoTransitionMode.slide_in.value:
204
+ clip = video_effects.slidein_transition(clip, 1, shuffle_side)
205
+ elif video_transition_mode.value == VideoTransitionMode.slide_out.value:
206
+ clip = video_effects.slideout_transition(clip, 1, shuffle_side)
207
+ elif video_transition_mode.value == VideoTransitionMode.shuffle.value:
208
+ transition_funcs = [
209
+ lambda c: video_effects.fadein_transition(c, 1),
210
+ lambda c: video_effects.fadeout_transition(c, 1),
211
+ lambda c: video_effects.slidein_transition(c, 1, shuffle_side),
212
+ lambda c: video_effects.slideout_transition(c, 1, shuffle_side),
213
+ ]
214
+ shuffle_transition = random.choice(transition_funcs)
215
+ clip = shuffle_transition(clip)
216
+
217
+ if clip.duration > max_clip_duration:
218
+ clip = clip.subclipped(0, max_clip_duration)
219
+
220
+ # wirte clip to temp file
221
+ clip_file = f"{output_dir}/temp-clip-{i+1}.mp4"
222
+ clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec)
223
+
224
+ close_clip(clip)
225
+
226
+ processed_clips.append(SubClippedVideoClip(file_path=clip_file, duration=clip.duration, width=clip_w, height=clip_h))
227
+ video_duration += clip.duration
228
+
229
+ except Exception as e:
230
+ logger.error(f"failed to process clip: {str(e)}")
231
+
232
+ # loop processed clips until the video duration matches or exceeds the audio duration.
233
+ if video_duration < audio_duration:
234
+ logger.warning(f"video duration ({video_duration:.2f}s) is shorter than audio duration ({audio_duration:.2f}s), looping clips to match audio length.")
235
+ base_clips = processed_clips.copy()
236
+ for clip in itertools.cycle(base_clips):
237
+ if video_duration >= audio_duration:
238
+ break
239
+ processed_clips.append(clip)
240
+ video_duration += clip.duration
241
+ logger.info(f"video duration: {video_duration:.2f}s, audio duration: {audio_duration:.2f}s, looped {len(processed_clips)-len(base_clips)} clips")
242
+
243
+ # merge video clips progressively, avoid loading all videos at once to avoid memory overflow
244
+ logger.info("starting clip merging process")
245
+ if not processed_clips:
246
+ logger.warning("no clips available for merging")
247
+ return combined_video_path
248
+
249
+ # if there is only one clip, use it directly
250
+ if len(processed_clips) == 1:
251
+ logger.info("using single clip directly")
252
+ shutil.copy(processed_clips[0].file_path, combined_video_path)
253
+ delete_files(processed_clips)
254
+ logger.info("video combining completed")
255
+ return combined_video_path
256
+
257
+ # create initial video file as base
258
+ base_clip_path = processed_clips[0].file_path
259
+ temp_merged_video = f"{output_dir}/temp-merged-video.mp4"
260
+ temp_merged_next = f"{output_dir}/temp-merged-next.mp4"
261
+
262
+ # copy first clip as initial merged video
263
+ shutil.copy(base_clip_path, temp_merged_video)
264
+
265
+ # merge remaining video clips one by one
266
+ for i, clip in enumerate(processed_clips[1:], 1):
267
+ logger.info(f"merging clip {i}/{len(processed_clips)-1}, duration: {clip.duration:.2f}s")
268
+
269
+ try:
270
+ # load current base video and next clip to merge
271
+ base_clip = VideoFileClip(temp_merged_video)
272
+ next_clip = VideoFileClip(clip.file_path)
273
+
274
+ # merge these two clips
275
+ merged_clip = concatenate_videoclips([base_clip, next_clip])
276
+
277
+ # save merged result to temp file
278
+ merged_clip.write_videofile(
279
+ filename=temp_merged_next,
280
+ threads=threads,
281
+ logger=None,
282
+ temp_audiofile_path=output_dir,
283
+ audio_codec=audio_codec,
284
+ fps=fps,
285
+ )
286
+ close_clip(base_clip)
287
+ close_clip(next_clip)
288
+ close_clip(merged_clip)
289
+
290
+ # replace base file with new merged file
291
+ delete_files(temp_merged_video)
292
+ os.rename(temp_merged_next, temp_merged_video)
293
+
294
+ except Exception as e:
295
+ logger.error(f"failed to merge clip: {str(e)}")
296
+ continue
297
+
298
+ # after merging, rename final result to target file name
299
+ os.rename(temp_merged_video, combined_video_path)
300
+
301
+ # clean temp files
302
+ clip_files = [clip.file_path for clip in processed_clips]
303
+ delete_files(clip_files)
304
+
305
+ logger.info("video combining completed")
306
+ return combined_video_path
307
+
308
+
309
+ def wrap_text(text, max_width, font="Arial", fontsize=60):
310
+ # Create ImageFont
311
+ font = ImageFont.truetype(font, fontsize)
312
+
313
+ def get_text_size(inner_text):
314
+ inner_text = inner_text.strip()
315
+ left, top, right, bottom = font.getbbox(inner_text)
316
+ return right - left, bottom - top
317
+
318
+ width, height = get_text_size(text)
319
+ if width <= max_width:
320
+ return text, height
321
+
322
+ processed = True
323
+
324
+ _wrapped_lines_ = []
325
+ words = text.split(" ")
326
+ _txt_ = ""
327
+ for word in words:
328
+ _before = _txt_
329
+ _txt_ += f"{word} "
330
+ _width, _height = get_text_size(_txt_)
331
+ if _width <= max_width:
332
+ continue
333
+ else:
334
+ if _txt_.strip() == word.strip():
335
+ processed = False
336
+ break
337
+ _wrapped_lines_.append(_before)
338
+ _txt_ = f"{word} "
339
+ _wrapped_lines_.append(_txt_)
340
+ if processed:
341
+ _wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
342
+ result = "\n".join(_wrapped_lines_).strip()
343
+ height = len(_wrapped_lines_) * height
344
+ return result, height
345
+
346
+ _wrapped_lines_ = []
347
+ chars = list(text)
348
+ _txt_ = ""
349
+ for word in chars:
350
+ _txt_ += word
351
+ _width, _height = get_text_size(_txt_)
352
+ if _width <= max_width:
353
+ continue
354
+ else:
355
+ _wrapped_lines_.append(_txt_)
356
+ _txt_ = ""
357
+ _wrapped_lines_.append(_txt_)
358
+ result = "\n".join(_wrapped_lines_).strip()
359
+ height = len(_wrapped_lines_) * height
360
+ return result, height
361
+
362
+
363
+ def generate_video(
364
+ video_path: str,
365
+ audio_path: str,
366
+ subtitle_path: str,
367
+ output_file: str,
368
+ params: VideoParams,
369
+ ):
370
+ aspect = VideoAspect(params.video_aspect)
371
+ video_width, video_height = aspect.to_resolution()
372
+
373
+ logger.info(f"generating video: {video_width} x {video_height}")
374
+ logger.info(f" ① video: {video_path}")
375
+ logger.info(f" ② audio: {audio_path}")
376
+ logger.info(f" ③ subtitle: {subtitle_path}")
377
+ logger.info(f" ④ output: {output_file}")
378
+
379
+ # https://github.com/harry0703/MoneyPrinterTurbo/issues/217
380
+ # PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'final-1.mp4.tempTEMP_MPY_wvf_snd.mp3'
381
+ # write into the same directory as the output file
382
+ output_dir = os.path.dirname(output_file)
383
+
384
+ font_path = ""
385
+ if params.subtitle_enabled:
386
+ if not params.font_name:
387
+ params.font_name = "STHeitiMedium.ttc"
388
+ font_path = os.path.join(utils.font_dir(), params.font_name)
389
+ if os.name == "nt":
390
+ font_path = font_path.replace("\\", "/")
391
+
392
+ logger.info(f" ⑤ font: {font_path}")
393
+
394
+ def create_text_clip(subtitle_item):
395
+ params.font_size = int(params.font_size)
396
+ params.stroke_width = int(params.stroke_width)
397
+ phrase = subtitle_item[1]
398
+ max_width = video_width * 0.9
399
+ wrapped_txt, txt_height = wrap_text(
400
+ phrase, max_width=max_width, font=font_path, fontsize=params.font_size
401
+ )
402
+ interline = int(params.font_size * 0.25)
403
+ size=(int(max_width), int(txt_height + params.font_size * 0.25 + (interline * (wrapped_txt.count("\n") + 1))))
404
+
405
+ _clip = TextClip(
406
+ text=wrapped_txt,
407
+ font=font_path,
408
+ font_size=params.font_size,
409
+ color=params.text_fore_color,
410
+ bg_color=params.text_background_color,
411
+ stroke_color=params.stroke_color,
412
+ stroke_width=params.stroke_width,
413
+ # interline=interline,
414
+ # size=size,
415
+ )
416
+ duration = subtitle_item[0][1] - subtitle_item[0][0]
417
+ _clip = _clip.with_start(subtitle_item[0][0])
418
+ _clip = _clip.with_end(subtitle_item[0][1])
419
+ _clip = _clip.with_duration(duration)
420
+ if params.subtitle_position == "bottom":
421
+ _clip = _clip.with_position(("center", video_height * 0.95 - _clip.h))
422
+ elif params.subtitle_position == "top":
423
+ _clip = _clip.with_position(("center", video_height * 0.05))
424
+ elif params.subtitle_position == "custom":
425
+ # Ensure the subtitle is fully within the screen bounds
426
+ margin = 10 # Additional margin, in pixels
427
+ max_y = video_height - _clip.h - margin
428
+ min_y = margin
429
+ custom_y = (video_height - _clip.h) * (params.custom_position / 100)
430
+ custom_y = max(
431
+ min_y, min(custom_y, max_y)
432
+ ) # Constrain the y value within the valid range
433
+ _clip = _clip.with_position(("center", custom_y))
434
+ else: # center
435
+ _clip = _clip.with_position(("center", "center"))
436
+ return _clip
437
+
438
+ video_clip = VideoFileClip(video_path).without_audio()
439
+ audio_clip = AudioFileClip(audio_path).with_effects(
440
+ [afx.MultiplyVolume(params.voice_volume)]
441
+ )
442
+
443
+ def make_textclip(text):
444
+ return TextClip(
445
+ text=text,
446
+ font=font_path,
447
+ font_size=params.font_size,
448
+ )
449
+
450
+ if subtitle_path and os.path.exists(subtitle_path):
451
+ sub = SubtitlesClip(
452
+ subtitles=subtitle_path, encoding="utf-8", make_textclip=make_textclip
453
+ )
454
+ text_clips = []
455
+ for item in sub.subtitles:
456
+ clip = create_text_clip(subtitle_item=item)
457
+ text_clips.append(clip)
458
+ video_clip = CompositeVideoClip([video_clip, *text_clips])
459
+
460
+ bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
461
+ if bgm_file:
462
+ try:
463
+ bgm_clip = AudioFileClip(bgm_file).with_effects(
464
+ [
465
+ afx.MultiplyVolume(params.bgm_volume),
466
+ afx.AudioFadeOut(3),
467
+ afx.AudioLoop(duration=video_clip.duration),
468
+ ]
469
+ )
470
+ audio_clip = CompositeAudioClip([audio_clip, bgm_clip])
471
+ except Exception as e:
472
+ logger.error(f"failed to add bgm: {str(e)}")
473
+
474
+ video_clip = video_clip.with_audio(audio_clip)
475
+ video_clip.write_videofile(
476
+ output_file,
477
+ audio_codec=audio_codec,
478
+ temp_audiofile_path=output_dir,
479
+ threads=params.n_threads or 2,
480
+ logger=None,
481
+ fps=fps,
482
+ )
483
+ video_clip.close()
484
+ del video_clip
485
+
486
+
487
+ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
488
+ for material in materials:
489
+ if not material.url:
490
+ continue
491
+
492
+ ext = utils.parse_extension(material.url)
493
+ try:
494
+ clip = VideoFileClip(material.url)
495
+ except Exception:
496
+ clip = ImageClip(material.url)
497
+
498
+ width = clip.size[0]
499
+ height = clip.size[1]
500
+ if width < 480 or height < 480:
501
+ logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required")
502
+ continue
503
+
504
+ if ext in const.FILE_TYPE_IMAGES:
505
+ logger.info(f"processing image: {material.url}")
506
+ # Create an image clip and set its duration to 3 seconds
507
+ clip = (
508
+ ImageClip(material.url)
509
+ .with_duration(clip_duration)
510
+ .with_position("center")
511
+ )
512
+ # Apply a zoom effect using the resize method.
513
+ # A lambda function is used to make the zoom effect dynamic over time.
514
+ # The zoom effect starts from the original size and gradually scales up to 120%.
515
+ # t represents the current time, and clip.duration is the total duration of the clip (3 seconds).
516
+ # Note: 1 represents 100% size, so 1.2 represents 120% size.
517
+ zoom_clip = clip.resized(
518
+ lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
519
+ )
520
+
521
+ # Optionally, create a composite video clip containing the zoomed clip.
522
+ # This is useful when you want to add other elements to the video.
523
+ final_clip = CompositeVideoClip([zoom_clip])
524
+
525
+ # Output the video to a file.
526
+ video_file = f"{material.url}.mp4"
527
+ final_clip.write_videofile(video_file, fps=30, logger=None)
528
+ close_clip(clip)
529
+ material.url = video_file
530
+ logger.success(f"image processed: {video_file}")
531
+ return materials
app/services/voice.py ADDED
@@ -0,0 +1,1566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import re
4
+ from datetime import datetime
5
+ from typing import Union
6
+ from xml.sax.saxutils import unescape
7
+
8
+ import edge_tts
9
+ import requests
10
+ from edge_tts import SubMaker, submaker
11
+ from edge_tts.submaker import mktimestamp
12
+ from loguru import logger
13
+ from moviepy.video.tools import subtitles
14
+
15
+ from app.config import config
16
+ from app.utils import utils
17
+
18
+
19
+ def get_siliconflow_voices() -> list[str]:
20
+ """
21
+ 获取硅基流动的声音列表
22
+
23
+ Returns:
24
+ 声音列表,格式为 ["siliconflow:FunAudioLLM/CosyVoice2-0.5B:alex", ...]
25
+ """
26
+ # 硅基流动的声音列表和对应的性别(用于显示)
27
+ voices_with_gender = [
28
+ ("FunAudioLLM/CosyVoice2-0.5B", "alex", "Male"),
29
+ ("FunAudioLLM/CosyVoice2-0.5B", "anna", "Female"),
30
+ ("FunAudioLLM/CosyVoice2-0.5B", "bella", "Female"),
31
+ ("FunAudioLLM/CosyVoice2-0.5B", "benjamin", "Male"),
32
+ ("FunAudioLLM/CosyVoice2-0.5B", "charles", "Male"),
33
+ ("FunAudioLLM/CosyVoice2-0.5B", "claire", "Female"),
34
+ ("FunAudioLLM/CosyVoice2-0.5B", "david", "Male"),
35
+ ("FunAudioLLM/CosyVoice2-0.5B", "diana", "Female"),
36
+ ]
37
+
38
+ # 添加siliconflow:前缀,并格式化为显示名称
39
+ return [
40
+ f"siliconflow:{model}:{voice}-{gender}"
41
+ for model, voice, gender in voices_with_gender
42
+ ]
43
+
44
+
45
+ def get_all_azure_voices(filter_locals=None) -> list[str]:
46
+ azure_voices_str = """
47
+ Name: af-ZA-AdriNeural
48
+ Gender: Female
49
+
50
+ Name: af-ZA-WillemNeural
51
+ Gender: Male
52
+
53
+ Name: am-ET-AmehaNeural
54
+ Gender: Male
55
+
56
+ Name: am-ET-MekdesNeural
57
+ Gender: Female
58
+
59
+ Name: ar-AE-FatimaNeural
60
+ Gender: Female
61
+
62
+ Name: ar-AE-HamdanNeural
63
+ Gender: Male
64
+
65
+ Name: ar-BH-AliNeural
66
+ Gender: Male
67
+
68
+ Name: ar-BH-LailaNeural
69
+ Gender: Female
70
+
71
+ Name: ar-DZ-AminaNeural
72
+ Gender: Female
73
+
74
+ Name: ar-DZ-IsmaelNeural
75
+ Gender: Male
76
+
77
+ Name: ar-EG-SalmaNeural
78
+ Gender: Female
79
+
80
+ Name: ar-EG-ShakirNeural
81
+ Gender: Male
82
+
83
+ Name: ar-IQ-BasselNeural
84
+ Gender: Male
85
+
86
+ Name: ar-IQ-RanaNeural
87
+ Gender: Female
88
+
89
+ Name: ar-JO-SanaNeural
90
+ Gender: Female
91
+
92
+ Name: ar-JO-TaimNeural
93
+ Gender: Male
94
+
95
+ Name: ar-KW-FahedNeural
96
+ Gender: Male
97
+
98
+ Name: ar-KW-NouraNeural
99
+ Gender: Female
100
+
101
+ Name: ar-LB-LaylaNeural
102
+ Gender: Female
103
+
104
+ Name: ar-LB-RamiNeural
105
+ Gender: Male
106
+
107
+ Name: ar-LY-ImanNeural
108
+ Gender: Female
109
+
110
+ Name: ar-LY-OmarNeural
111
+ Gender: Male
112
+
113
+ Name: ar-MA-JamalNeural
114
+ Gender: Male
115
+
116
+ Name: ar-MA-MounaNeural
117
+ Gender: Female
118
+
119
+ Name: ar-OM-AbdullahNeural
120
+ Gender: Male
121
+
122
+ Name: ar-OM-AyshaNeural
123
+ Gender: Female
124
+
125
+ Name: ar-QA-AmalNeural
126
+ Gender: Female
127
+
128
+ Name: ar-QA-MoazNeural
129
+ Gender: Male
130
+
131
+ Name: ar-SA-HamedNeural
132
+ Gender: Male
133
+
134
+ Name: ar-SA-ZariyahNeural
135
+ Gender: Female
136
+
137
+ Name: ar-SY-AmanyNeural
138
+ Gender: Female
139
+
140
+ Name: ar-SY-LaithNeural
141
+ Gender: Male
142
+
143
+ Name: ar-TN-HediNeural
144
+ Gender: Male
145
+
146
+ Name: ar-TN-ReemNeural
147
+ Gender: Female
148
+
149
+ Name: ar-YE-MaryamNeural
150
+ Gender: Female
151
+
152
+ Name: ar-YE-SalehNeural
153
+ Gender: Male
154
+
155
+ Name: az-AZ-BabekNeural
156
+ Gender: Male
157
+
158
+ Name: az-AZ-BanuNeural
159
+ Gender: Female
160
+
161
+ Name: bg-BG-BorislavNeural
162
+ Gender: Male
163
+
164
+ Name: bg-BG-KalinaNeural
165
+ Gender: Female
166
+
167
+ Name: bn-BD-NabanitaNeural
168
+ Gender: Female
169
+
170
+ Name: bn-BD-PradeepNeural
171
+ Gender: Male
172
+
173
+ Name: bn-IN-BashkarNeural
174
+ Gender: Male
175
+
176
+ Name: bn-IN-TanishaaNeural
177
+ Gender: Female
178
+
179
+ Name: bs-BA-GoranNeural
180
+ Gender: Male
181
+
182
+ Name: bs-BA-VesnaNeural
183
+ Gender: Female
184
+
185
+ Name: ca-ES-EnricNeural
186
+ Gender: Male
187
+
188
+ Name: ca-ES-JoanaNeural
189
+ Gender: Female
190
+
191
+ Name: cs-CZ-AntoninNeural
192
+ Gender: Male
193
+
194
+ Name: cs-CZ-VlastaNeural
195
+ Gender: Female
196
+
197
+ Name: cy-GB-AledNeural
198
+ Gender: Male
199
+
200
+ Name: cy-GB-NiaNeural
201
+ Gender: Female
202
+
203
+ Name: da-DK-ChristelNeural
204
+ Gender: Female
205
+
206
+ Name: da-DK-JeppeNeural
207
+ Gender: Male
208
+
209
+ Name: de-AT-IngridNeural
210
+ Gender: Female
211
+
212
+ Name: de-AT-JonasNeural
213
+ Gender: Male
214
+
215
+ Name: de-CH-JanNeural
216
+ Gender: Male
217
+
218
+ Name: de-CH-LeniNeural
219
+ Gender: Female
220
+
221
+ Name: de-DE-AmalaNeural
222
+ Gender: Female
223
+
224
+ Name: de-DE-ConradNeural
225
+ Gender: Male
226
+
227
+ Name: de-DE-FlorianMultilingualNeural
228
+ Gender: Male
229
+
230
+ Name: de-DE-KatjaNeural
231
+ Gender: Female
232
+
233
+ Name: de-DE-KillianNeural
234
+ Gender: Male
235
+
236
+ Name: de-DE-SeraphinaMultilingualNeural
237
+ Gender: Female
238
+
239
+ Name: el-GR-AthinaNeural
240
+ Gender: Female
241
+
242
+ Name: el-GR-NestorasNeural
243
+ Gender: Male
244
+
245
+ Name: en-AU-NatashaNeural
246
+ Gender: Female
247
+
248
+ Name: en-AU-WilliamNeural
249
+ Gender: Male
250
+
251
+ Name: en-CA-ClaraNeural
252
+ Gender: Female
253
+
254
+ Name: en-CA-LiamNeural
255
+ Gender: Male
256
+
257
+ Name: en-GB-LibbyNeural
258
+ Gender: Female
259
+
260
+ Name: en-GB-MaisieNeural
261
+ Gender: Female
262
+
263
+ Name: en-GB-RyanNeural
264
+ Gender: Male
265
+
266
+ Name: en-GB-SoniaNeural
267
+ Gender: Female
268
+
269
+ Name: en-GB-ThomasNeural
270
+ Gender: Male
271
+
272
+ Name: en-HK-SamNeural
273
+ Gender: Male
274
+
275
+ Name: en-HK-YanNeural
276
+ Gender: Female
277
+
278
+ Name: en-IE-ConnorNeural
279
+ Gender: Male
280
+
281
+ Name: en-IE-EmilyNeural
282
+ Gender: Female
283
+
284
+ Name: en-IN-NeerjaExpressiveNeural
285
+ Gender: Female
286
+
287
+ Name: en-IN-NeerjaNeural
288
+ Gender: Female
289
+
290
+ Name: en-IN-PrabhatNeural
291
+ Gender: Male
292
+
293
+ Name: en-KE-AsiliaNeural
294
+ Gender: Female
295
+
296
+ Name: en-KE-ChilembaNeural
297
+ Gender: Male
298
+
299
+ Name: en-NG-AbeoNeural
300
+ Gender: Male
301
+
302
+ Name: en-NG-EzinneNeural
303
+ Gender: Female
304
+
305
+ Name: en-NZ-MitchellNeural
306
+ Gender: Male
307
+
308
+ Name: en-NZ-MollyNeural
309
+ Gender: Female
310
+
311
+ Name: en-PH-JamesNeural
312
+ Gender: Male
313
+
314
+ Name: en-PH-RosaNeural
315
+ Gender: Female
316
+
317
+ Name: en-SG-LunaNeural
318
+ Gender: Female
319
+
320
+ Name: en-SG-WayneNeural
321
+ Gender: Male
322
+
323
+ Name: en-TZ-ElimuNeural
324
+ Gender: Male
325
+
326
+ Name: en-TZ-ImaniNeural
327
+ Gender: Female
328
+
329
+ Name: en-US-AnaNeural
330
+ Gender: Female
331
+
332
+ Name: en-US-AndrewMultilingualNeural
333
+ Gender: Male
334
+
335
+ Name: en-US-AndrewNeural
336
+ Gender: Male
337
+
338
+ Name: en-US-AriaNeural
339
+ Gender: Female
340
+
341
+ Name: en-US-AvaMultilingualNeural
342
+ Gender: Female
343
+
344
+ Name: en-US-AvaNeural
345
+ Gender: Female
346
+
347
+ Name: en-US-BrianMultilingualNeural
348
+ Gender: Male
349
+
350
+ Name: en-US-BrianNeural
351
+ Gender: Male
352
+
353
+ Name: en-US-ChristopherNeural
354
+ Gender: Male
355
+
356
+ Name: en-US-EmmaMultilingualNeural
357
+ Gender: Female
358
+
359
+ Name: en-US-EmmaNeural
360
+ Gender: Female
361
+
362
+ Name: en-US-EricNeural
363
+ Gender: Male
364
+
365
+ Name: en-US-GuyNeural
366
+ Gender: Male
367
+
368
+ Name: en-US-JennyNeural
369
+ Gender: Female
370
+
371
+ Name: en-US-MichelleNeural
372
+ Gender: Female
373
+
374
+ Name: en-US-RogerNeural
375
+ Gender: Male
376
+
377
+ Name: en-US-SteffanNeural
378
+ Gender: Male
379
+
380
+ Name: en-ZA-LeahNeural
381
+ Gender: Female
382
+
383
+ Name: en-ZA-LukeNeural
384
+ Gender: Male
385
+
386
+ Name: es-AR-ElenaNeural
387
+ Gender: Female
388
+
389
+ Name: es-AR-TomasNeural
390
+ Gender: Male
391
+
392
+ Name: es-BO-MarceloNeural
393
+ Gender: Male
394
+
395
+ Name: es-BO-SofiaNeural
396
+ Gender: Female
397
+
398
+ Name: es-CL-CatalinaNeural
399
+ Gender: Female
400
+
401
+ Name: es-CL-LorenzoNeural
402
+ Gender: Male
403
+
404
+ Name: es-CO-GonzaloNeural
405
+ Gender: Male
406
+
407
+ Name: es-CO-SalomeNeural
408
+ Gender: Female
409
+
410
+ Name: es-CR-JuanNeural
411
+ Gender: Male
412
+
413
+ Name: es-CR-MariaNeural
414
+ Gender: Female
415
+
416
+ Name: es-CU-BelkysNeural
417
+ Gender: Female
418
+
419
+ Name: es-CU-ManuelNeural
420
+ Gender: Male
421
+
422
+ Name: es-DO-EmilioNeural
423
+ Gender: Male
424
+
425
+ Name: es-DO-RamonaNeural
426
+ Gender: Female
427
+
428
+ Name: es-EC-AndreaNeural
429
+ Gender: Female
430
+
431
+ Name: es-EC-LuisNeural
432
+ Gender: Male
433
+
434
+ Name: es-ES-AlvaroNeural
435
+ Gender: Male
436
+
437
+ Name: es-ES-ElviraNeural
438
+ Gender: Female
439
+
440
+ Name: es-ES-XimenaNeural
441
+ Gender: Female
442
+
443
+ Name: es-GQ-JavierNeural
444
+ Gender: Male
445
+
446
+ Name: es-GQ-TeresaNeural
447
+ Gender: Female
448
+
449
+ Name: es-GT-AndresNeural
450
+ Gender: Male
451
+
452
+ Name: es-GT-MartaNeural
453
+ Gender: Female
454
+
455
+ Name: es-HN-CarlosNeural
456
+ Gender: Male
457
+
458
+ Name: es-HN-KarlaNeural
459
+ Gender: Female
460
+
461
+ Name: es-MX-DaliaNeural
462
+ Gender: Female
463
+
464
+ Name: es-MX-JorgeNeural
465
+ Gender: Male
466
+
467
+ Name: es-NI-FedericoNeural
468
+ Gender: Male
469
+
470
+ Name: es-NI-YolandaNeural
471
+ Gender: Female
472
+
473
+ Name: es-PA-MargaritaNeural
474
+ Gender: Female
475
+
476
+ Name: es-PA-RobertoNeural
477
+ Gender: Male
478
+
479
+ Name: es-PE-AlexNeural
480
+ Gender: Male
481
+
482
+ Name: es-PE-CamilaNeural
483
+ Gender: Female
484
+
485
+ Name: es-PR-KarinaNeural
486
+ Gender: Female
487
+
488
+ Name: es-PR-VictorNeural
489
+ Gender: Male
490
+
491
+ Name: es-PY-MarioNeural
492
+ Gender: Male
493
+
494
+ Name: es-PY-TaniaNeural
495
+ Gender: Female
496
+
497
+ Name: es-SV-LorenaNeural
498
+ Gender: Female
499
+
500
+ Name: es-SV-RodrigoNeural
501
+ Gender: Male
502
+
503
+ Name: es-US-AlonsoNeural
504
+ Gender: Male
505
+
506
+ Name: es-US-PalomaNeural
507
+ Gender: Female
508
+
509
+ Name: es-UY-MateoNeural
510
+ Gender: Male
511
+
512
+ Name: es-UY-ValentinaNeural
513
+ Gender: Female
514
+
515
+ Name: es-VE-PaolaNeural
516
+ Gender: Female
517
+
518
+ Name: es-VE-SebastianNeural
519
+ Gender: Male
520
+
521
+ Name: et-EE-AnuNeural
522
+ Gender: Female
523
+
524
+ Name: et-EE-KertNeural
525
+ Gender: Male
526
+
527
+ Name: fa-IR-DilaraNeural
528
+ Gender: Female
529
+
530
+ Name: fa-IR-FaridNeural
531
+ Gender: Male
532
+
533
+ Name: fi-FI-HarriNeural
534
+ Gender: Male
535
+
536
+ Name: fi-FI-NooraNeural
537
+ Gender: Female
538
+
539
+ Name: fil-PH-AngeloNeural
540
+ Gender: Male
541
+
542
+ Name: fil-PH-BlessicaNeural
543
+ Gender: Female
544
+
545
+ Name: fr-BE-CharlineNeural
546
+ Gender: Female
547
+
548
+ Name: fr-BE-GerardNeural
549
+ Gender: Male
550
+
551
+ Name: fr-CA-AntoineNeural
552
+ Gender: Male
553
+
554
+ Name: fr-CA-JeanNeural
555
+ Gender: Male
556
+
557
+ Name: fr-CA-SylvieNeural
558
+ Gender: Female
559
+
560
+ Name: fr-CA-ThierryNeural
561
+ Gender: Male
562
+
563
+ Name: fr-CH-ArianeNeural
564
+ Gender: Female
565
+
566
+ Name: fr-CH-FabriceNeural
567
+ Gender: Male
568
+
569
+ Name: fr-FR-DeniseNeural
570
+ Gender: Female
571
+
572
+ Name: fr-FR-EloiseNeural
573
+ Gender: Female
574
+
575
+ Name: fr-FR-HenriNeural
576
+ Gender: Male
577
+
578
+ Name: fr-FR-RemyMultilingualNeural
579
+ Gender: Male
580
+
581
+ Name: fr-FR-VivienneMultilingualNeural
582
+ Gender: Female
583
+
584
+ Name: ga-IE-ColmNeural
585
+ Gender: Male
586
+
587
+ Name: ga-IE-OrlaNeural
588
+ Gender: Female
589
+
590
+ Name: gl-ES-RoiNeural
591
+ Gender: Male
592
+
593
+ Name: gl-ES-SabelaNeural
594
+ Gender: Female
595
+
596
+ Name: gu-IN-DhwaniNeural
597
+ Gender: Female
598
+
599
+ Name: gu-IN-NiranjanNeural
600
+ Gender: Male
601
+
602
+ Name: he-IL-AvriNeural
603
+ Gender: Male
604
+
605
+ Name: he-IL-HilaNeural
606
+ Gender: Female
607
+
608
+ Name: hi-IN-MadhurNeural
609
+ Gender: Male
610
+
611
+ Name: hi-IN-SwaraNeural
612
+ Gender: Female
613
+
614
+ Name: hr-HR-GabrijelaNeural
615
+ Gender: Female
616
+
617
+ Name: hr-HR-SreckoNeural
618
+ Gender: Male
619
+
620
+ Name: hu-HU-NoemiNeural
621
+ Gender: Female
622
+
623
+ Name: hu-HU-TamasNeural
624
+ Gender: Male
625
+
626
+ Name: id-ID-ArdiNeural
627
+ Gender: Male
628
+
629
+ Name: id-ID-GadisNeural
630
+ Gender: Female
631
+
632
+ Name: is-IS-GudrunNeural
633
+ Gender: Female
634
+
635
+ Name: is-IS-GunnarNeural
636
+ Gender: Male
637
+
638
+ Name: it-IT-DiegoNeural
639
+ Gender: Male
640
+
641
+ Name: it-IT-ElsaNeural
642
+ Gender: Female
643
+
644
+ Name: it-IT-GiuseppeMultilingualNeural
645
+ Gender: Male
646
+
647
+ Name: it-IT-IsabellaNeural
648
+ Gender: Female
649
+
650
+ Name: iu-Cans-CA-SiqiniqNeural
651
+ Gender: Female
652
+
653
+ Name: iu-Cans-CA-TaqqiqNeural
654
+ Gender: Male
655
+
656
+ Name: iu-Latn-CA-SiqiniqNeural
657
+ Gender: Female
658
+
659
+ Name: iu-Latn-CA-TaqqiqNeural
660
+ Gender: Male
661
+
662
+ Name: ja-JP-KeitaNeural
663
+ Gender: Male
664
+
665
+ Name: ja-JP-NanamiNeural
666
+ Gender: Female
667
+
668
+ Name: jv-ID-DimasNeural
669
+ Gender: Male
670
+
671
+ Name: jv-ID-SitiNeural
672
+ Gender: Female
673
+
674
+ Name: ka-GE-EkaNeural
675
+ Gender: Female
676
+
677
+ Name: ka-GE-GiorgiNeural
678
+ Gender: Male
679
+
680
+ Name: kk-KZ-AigulNeural
681
+ Gender: Female
682
+
683
+ Name: kk-KZ-DauletNeural
684
+ Gender: Male
685
+
686
+ Name: km-KH-PisethNeural
687
+ Gender: Male
688
+
689
+ Name: km-KH-SreymomNeural
690
+ Gender: Female
691
+
692
+ Name: kn-IN-GaganNeural
693
+ Gender: Male
694
+
695
+ Name: kn-IN-SapnaNeural
696
+ Gender: Female
697
+
698
+ Name: ko-KR-HyunsuMultilingualNeural
699
+ Gender: Male
700
+
701
+ Name: ko-KR-InJoonNeural
702
+ Gender: Male
703
+
704
+ Name: ko-KR-SunHiNeural
705
+ Gender: Female
706
+
707
+ Name: lo-LA-ChanthavongNeural
708
+ Gender: Male
709
+
710
+ Name: lo-LA-KeomanyNeural
711
+ Gender: Female
712
+
713
+ Name: lt-LT-LeonasNeural
714
+ Gender: Male
715
+
716
+ Name: lt-LT-OnaNeural
717
+ Gender: Female
718
+
719
+ Name: lv-LV-EveritaNeural
720
+ Gender: Female
721
+
722
+ Name: lv-LV-NilsNeural
723
+ Gender: Male
724
+
725
+ Name: mk-MK-AleksandarNeural
726
+ Gender: Male
727
+
728
+ Name: mk-MK-MarijaNeural
729
+ Gender: Female
730
+
731
+ Name: ml-IN-MidhunNeural
732
+ Gender: Male
733
+
734
+ Name: ml-IN-SobhanaNeural
735
+ Gender: Female
736
+
737
+ Name: mn-MN-BataaNeural
738
+ Gender: Male
739
+
740
+ Name: mn-MN-YesuiNeural
741
+ Gender: Female
742
+
743
+ Name: mr-IN-AarohiNeural
744
+ Gender: Female
745
+
746
+ Name: mr-IN-ManoharNeural
747
+ Gender: Male
748
+
749
+ Name: ms-MY-OsmanNeural
750
+ Gender: Male
751
+
752
+ Name: ms-MY-YasminNeural
753
+ Gender: Female
754
+
755
+ Name: mt-MT-GraceNeural
756
+ Gender: Female
757
+
758
+ Name: mt-MT-JosephNeural
759
+ Gender: Male
760
+
761
+ Name: my-MM-NilarNeural
762
+ Gender: Female
763
+
764
+ Name: my-MM-ThihaNeural
765
+ Gender: Male
766
+
767
+ Name: nb-NO-FinnNeural
768
+ Gender: Male
769
+
770
+ Name: nb-NO-PernilleNeural
771
+ Gender: Female
772
+
773
+ Name: ne-NP-HemkalaNeural
774
+ Gender: Female
775
+
776
+ Name: ne-NP-SagarNeural
777
+ Gender: Male
778
+
779
+ Name: nl-BE-ArnaudNeural
780
+ Gender: Male
781
+
782
+ Name: nl-BE-DenaNeural
783
+ Gender: Female
784
+
785
+ Name: nl-NL-ColetteNeural
786
+ Gender: Female
787
+
788
+ Name: nl-NL-FennaNeural
789
+ Gender: Female
790
+
791
+ Name: nl-NL-MaartenNeural
792
+ Gender: Male
793
+
794
+ Name: pl-PL-MarekNeural
795
+ Gender: Male
796
+
797
+ Name: pl-PL-ZofiaNeural
798
+ Gender: Female
799
+
800
+ Name: ps-AF-GulNawazNeural
801
+ Gender: Male
802
+
803
+ Name: ps-AF-LatifaNeural
804
+ Gender: Female
805
+
806
+ Name: pt-BR-AntonioNeural
807
+ Gender: Male
808
+
809
+ Name: pt-BR-FranciscaNeural
810
+ Gender: Female
811
+
812
+ Name: pt-BR-ThalitaMultilingualNeural
813
+ Gender: Female
814
+
815
+ Name: pt-PT-DuarteNeural
816
+ Gender: Male
817
+
818
+ Name: pt-PT-RaquelNeural
819
+ Gender: Female
820
+
821
+ Name: ro-RO-AlinaNeural
822
+ Gender: Female
823
+
824
+ Name: ro-RO-EmilNeural
825
+ Gender: Male
826
+
827
+ Name: ru-RU-DmitryNeural
828
+ Gender: Male
829
+
830
+ Name: ru-RU-SvetlanaNeural
831
+ Gender: Female
832
+
833
+ Name: si-LK-SameeraNeural
834
+ Gender: Male
835
+
836
+ Name: si-LK-ThiliniNeural
837
+ Gender: Female
838
+
839
+ Name: sk-SK-LukasNeural
840
+ Gender: Male
841
+
842
+ Name: sk-SK-ViktoriaNeural
843
+ Gender: Female
844
+
845
+ Name: sl-SI-PetraNeural
846
+ Gender: Female
847
+
848
+ Name: sl-SI-RokNeural
849
+ Gender: Male
850
+
851
+ Name: so-SO-MuuseNeural
852
+ Gender: Male
853
+
854
+ Name: so-SO-UbaxNeural
855
+ Gender: Female
856
+
857
+ Name: sq-AL-AnilaNeural
858
+ Gender: Female
859
+
860
+ Name: sq-AL-IlirNeural
861
+ Gender: Male
862
+
863
+ Name: sr-RS-NicholasNeural
864
+ Gender: Male
865
+
866
+ Name: sr-RS-SophieNeural
867
+ Gender: Female
868
+
869
+ Name: su-ID-JajangNeural
870
+ Gender: Male
871
+
872
+ Name: su-ID-TutiNeural
873
+ Gender: Female
874
+
875
+ Name: sv-SE-MattiasNeural
876
+ Gender: Male
877
+
878
+ Name: sv-SE-SofieNeural
879
+ Gender: Female
880
+
881
+ Name: sw-KE-RafikiNeural
882
+ Gender: Male
883
+
884
+ Name: sw-KE-ZuriNeural
885
+ Gender: Female
886
+
887
+ Name: sw-TZ-DaudiNeural
888
+ Gender: Male
889
+
890
+ Name: sw-TZ-RehemaNeural
891
+ Gender: Female
892
+
893
+ Name: ta-IN-PallaviNeural
894
+ Gender: Female
895
+
896
+ Name: ta-IN-ValluvarNeural
897
+ Gender: Male
898
+
899
+ Name: ta-LK-KumarNeural
900
+ Gender: Male
901
+
902
+ Name: ta-LK-SaranyaNeural
903
+ Gender: Female
904
+
905
+ Name: ta-MY-KaniNeural
906
+ Gender: Female
907
+
908
+ Name: ta-MY-SuryaNeural
909
+ Gender: Male
910
+
911
+ Name: ta-SG-AnbuNeural
912
+ Gender: Male
913
+
914
+ Name: ta-SG-VenbaNeural
915
+ Gender: Female
916
+
917
+ Name: te-IN-MohanNeural
918
+ Gender: Male
919
+
920
+ Name: te-IN-ShrutiNeural
921
+ Gender: Female
922
+
923
+ Name: th-TH-NiwatNeural
924
+ Gender: Male
925
+
926
+ Name: th-TH-PremwadeeNeural
927
+ Gender: Female
928
+
929
+ Name: tr-TR-AhmetNeural
930
+ Gender: Male
931
+
932
+ Name: tr-TR-EmelNeural
933
+ Gender: Female
934
+
935
+ Name: uk-UA-OstapNeural
936
+ Gender: Male
937
+
938
+ Name: uk-UA-PolinaNeural
939
+ Gender: Female
940
+
941
+ Name: ur-IN-GulNeural
942
+ Gender: Female
943
+
944
+ Name: ur-IN-SalmanNeural
945
+ Gender: Male
946
+
947
+ Name: ur-PK-AsadNeural
948
+ Gender: Male
949
+
950
+ Name: ur-PK-UzmaNeural
951
+ Gender: Female
952
+
953
+ Name: uz-UZ-MadinaNeural
954
+ Gender: Female
955
+
956
+ Name: uz-UZ-SardorNeural
957
+ Gender: Male
958
+
959
+ Name: vi-VN-HoaiMyNeural
960
+ Gender: Female
961
+
962
+ Name: vi-VN-NamMinhNeural
963
+ Gender: Male
964
+
965
+ Name: zh-CN-XiaoxiaoNeural
966
+ Gender: Female
967
+
968
+ Name: zh-CN-XiaoyiNeural
969
+ Gender: Female
970
+
971
+ Name: zh-CN-YunjianNeural
972
+ Gender: Male
973
+
974
+ Name: zh-CN-YunxiNeural
975
+ Gender: Male
976
+
977
+ Name: zh-CN-YunxiaNeural
978
+ Gender: Male
979
+
980
+ Name: zh-CN-YunyangNeural
981
+ Gender: Male
982
+
983
+ Name: zh-CN-liaoning-XiaobeiNeural
984
+ Gender: Female
985
+
986
+ Name: zh-CN-shaanxi-XiaoniNeural
987
+ Gender: Female
988
+
989
+ Name: zh-HK-HiuGaaiNeural
990
+ Gender: Female
991
+
992
+ Name: zh-HK-HiuMaanNeural
993
+ Gender: Female
994
+
995
+ Name: zh-HK-WanLungNeural
996
+ Gender: Male
997
+
998
+ Name: zh-TW-HsiaoChenNeural
999
+ Gender: Female
1000
+
1001
+ Name: zh-TW-HsiaoYuNeural
1002
+ Gender: Female
1003
+
1004
+ Name: zh-TW-YunJheNeural
1005
+ Gender: Male
1006
+
1007
+ Name: zu-ZA-ThandoNeural
1008
+ Gender: Female
1009
+
1010
+ Name: zu-ZA-ThembaNeural
1011
+ Gender: Male
1012
+
1013
+
1014
+ Name: en-US-AvaMultilingualNeural-V2
1015
+ Gender: Female
1016
+
1017
+ Name: en-US-AndrewMultilingualNeural-V2
1018
+ Gender: Male
1019
+
1020
+ Name: en-US-EmmaMultilingualNeural-V2
1021
+ Gender: Female
1022
+
1023
+ Name: en-US-BrianMultilingualNeural-V2
1024
+ Gender: Male
1025
+
1026
+ Name: de-DE-FlorianMultilingualNeural-V2
1027
+ Gender: Male
1028
+
1029
+ Name: de-DE-SeraphinaMultilingualNeural-V2
1030
+ Gender: Female
1031
+
1032
+ Name: fr-FR-RemyMultilingualNeural-V2
1033
+ Gender: Male
1034
+
1035
+ Name: fr-FR-VivienneMultilingualNeural-V2
1036
+ Gender: Female
1037
+
1038
+ Name: zh-CN-XiaoxiaoMultilingualNeural-V2
1039
+ Gender: Female
1040
+ """.strip()
1041
+ voices = []
1042
+ # 定义正则表达式模式,用于匹配 Name 和 Gender 行
1043
+ pattern = re.compile(r"Name:\s*(.+)\s*Gender:\s*(.+)\s*", re.MULTILINE)
1044
+ # 使用正则表达式查找所有匹配项
1045
+ matches = pattern.findall(azure_voices_str)
1046
+
1047
+ for name, gender in matches:
1048
+ # 应用过滤条件
1049
+ if filter_locals and any(
1050
+ name.lower().startswith(fl.lower()) for fl in filter_locals
1051
+ ):
1052
+ voices.append(f"{name}-{gender}")
1053
+ elif not filter_locals:
1054
+ voices.append(f"{name}-{gender}")
1055
+
1056
+ voices.sort()
1057
+ return voices
1058
+
1059
+
1060
+ def parse_voice_name(name: str):
1061
+ # zh-CN-XiaoyiNeural-Female
1062
+ # zh-CN-YunxiNeural-Male
1063
+ # zh-CN-XiaoxiaoMultilingualNeural-V2-Female
1064
+ name = name.replace("-Female", "").replace("-Male", "").strip()
1065
+ return name
1066
+
1067
+
1068
+ def is_azure_v2_voice(voice_name: str):
1069
+ voice_name = parse_voice_name(voice_name)
1070
+ if voice_name.endswith("-V2"):
1071
+ return voice_name.replace("-V2", "").strip()
1072
+ return ""
1073
+
1074
+
1075
+ def is_siliconflow_voice(voice_name: str):
1076
+ """检查是否是硅基流动的声音"""
1077
+ return voice_name.startswith("siliconflow:")
1078
+
1079
+
1080
+ def tts(
1081
+ text: str,
1082
+ voice_name: str,
1083
+ voice_rate: float,
1084
+ voice_file: str,
1085
+ voice_volume: float = 1.0,
1086
+ ) -> Union[SubMaker, None]:
1087
+ if is_azure_v2_voice(voice_name):
1088
+ return azure_tts_v2(text, voice_name, voice_file)
1089
+ elif is_siliconflow_voice(voice_name):
1090
+ # 从voice_name中提取模型和声音
1091
+ # 格式: siliconflow:model:voice-Gender
1092
+ parts = voice_name.split(":")
1093
+ if len(parts) >= 3:
1094
+ model = parts[1]
1095
+ # 移除性别后缀,例如 "alex-Male" -> "alex"
1096
+ voice_with_gender = parts[2]
1097
+ voice = voice_with_gender.split("-")[0]
1098
+ # 构建完整的voice参数,格式为 "model:voice"
1099
+ full_voice = f"{model}:{voice}"
1100
+ return siliconflow_tts(
1101
+ text, model, full_voice, voice_rate, voice_file, voice_volume
1102
+ )
1103
+ else:
1104
+ logger.error(f"Invalid siliconflow voice name format: {voice_name}")
1105
+ return None
1106
+ return azure_tts_v1(text, voice_name, voice_rate, voice_file)
1107
+
1108
+
1109
+ def convert_rate_to_percent(rate: float) -> str:
1110
+ if rate == 1.0:
1111
+ return "+0%"
1112
+ percent = round((rate - 1.0) * 100)
1113
+ if percent > 0:
1114
+ return f"+{percent}%"
1115
+ else:
1116
+ return f"{percent}%"
1117
+
1118
+
1119
+ def azure_tts_v1(
1120
+ text: str, voice_name: str, voice_rate: float, voice_file: str
1121
+ ) -> Union[SubMaker, None]:
1122
+ voice_name = parse_voice_name(voice_name)
1123
+ text = text.strip()
1124
+ rate_str = convert_rate_to_percent(voice_rate)
1125
+ for i in range(3):
1126
+ try:
1127
+ logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
1128
+
1129
+ async def _do() -> SubMaker:
1130
+ communicate = edge_tts.Communicate(text, voice_name, rate=rate_str)
1131
+ sub_maker = edge_tts.SubMaker()
1132
+ with open(voice_file, "wb") as file:
1133
+ async for chunk in communicate.stream():
1134
+ if chunk["type"] == "audio":
1135
+ file.write(chunk["data"])
1136
+ elif chunk["type"] == "WordBoundary":
1137
+ sub_maker.create_sub(
1138
+ (chunk["offset"], chunk["duration"]), chunk["text"]
1139
+ )
1140
+ return sub_maker
1141
+
1142
+ sub_maker = asyncio.run(_do())
1143
+ if not sub_maker or not sub_maker.subs:
1144
+ logger.warning("failed, sub_maker is None or sub_maker.subs is None")
1145
+ continue
1146
+
1147
+ logger.info(f"completed, output file: {voice_file}")
1148
+ return sub_maker
1149
+ except Exception as e:
1150
+ logger.error(f"failed, error: {str(e)}")
1151
+ return None
1152
+
1153
+
1154
+ def siliconflow_tts(
1155
+ text: str,
1156
+ model: str,
1157
+ voice: str,
1158
+ voice_rate: float,
1159
+ voice_file: str,
1160
+ voice_volume: float = 1.0,
1161
+ ) -> Union[SubMaker, None]:
1162
+ """
1163
+ 使用硅基流动的API生成语音
1164
+
1165
+ Args:
1166
+ text: 要转换为语音的文本
1167
+ model: 模型名称,如 "FunAudioLLM/CosyVoice2-0.5B"
1168
+ voice: 声音名称,如 "FunAudioLLM/CosyVoice2-0.5B:alex"
1169
+ voice_rate: 语音速度,范围[0.25, 4.0]
1170
+ voice_file: 输出的音频文件路径
1171
+ voice_volume: 语音音量,范围[0.6, 5.0],需要转换为硅基流动的增益范围[-10, 10]
1172
+
1173
+ Returns:
1174
+ SubMaker对象或None
1175
+ """
1176
+ text = text.strip()
1177
+ api_key = config.siliconflow.get("api_key", "")
1178
+
1179
+ if not api_key:
1180
+ logger.error("SiliconFlow API key is not set")
1181
+ return None
1182
+
1183
+ # 将voice_volume转换为硅基流动的增益范围
1184
+ # 默认voice_volume为1.0,对应gain为0
1185
+ gain = voice_volume - 1.0
1186
+ # 确保gain在[-10, 10]范围内
1187
+ gain = max(-10, min(10, gain))
1188
+
1189
+ url = "https://api.siliconflow.cn/v1/audio/speech"
1190
+
1191
+ payload = {
1192
+ "model": model,
1193
+ "input": text,
1194
+ "voice": voice,
1195
+ "response_format": "mp3",
1196
+ "sample_rate": 32000,
1197
+ "stream": False,
1198
+ "speed": voice_rate,
1199
+ "gain": gain,
1200
+ }
1201
+
1202
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
1203
+
1204
+ for i in range(3): # 尝试3次
1205
+ try:
1206
+ logger.info(
1207
+ f"start siliconflow tts, model: {model}, voice: {voice}, try: {i + 1}"
1208
+ )
1209
+
1210
+ response = requests.post(url, json=payload, headers=headers)
1211
+
1212
+ if response.status_code == 200:
1213
+ # 保存音频文件
1214
+ with open(voice_file, "wb") as f:
1215
+ f.write(response.content)
1216
+
1217
+ # 创建一个空的SubMaker对象
1218
+ sub_maker = SubMaker()
1219
+
1220
+ # 获取音频文件的实际长度
1221
+ try:
1222
+ # 尝试使用moviepy获取音频长度
1223
+ from moviepy import AudioFileClip
1224
+
1225
+ audio_clip = AudioFileClip(voice_file)
1226
+ audio_duration = audio_clip.duration
1227
+ audio_clip.close()
1228
+
1229
+ # 将音频长度转换为100纳秒单位(与edge_tts兼容)
1230
+ audio_duration_100ns = int(audio_duration * 10000000)
1231
+
1232
+ # 使用文本分割来创建更准确的字幕
1233
+ # 将文本按标点符号分割成句子
1234
+ sentences = utils.split_string_by_punctuations(text)
1235
+
1236
+ if sentences:
1237
+ # 计算每个句子的大致时长(按字符数比例分配)
1238
+ total_chars = sum(len(s) for s in sentences)
1239
+ char_duration = (
1240
+ audio_duration_100ns / total_chars if total_chars > 0 else 0
1241
+ )
1242
+
1243
+ current_offset = 0
1244
+ for sentence in sentences:
1245
+ if not sentence.strip():
1246
+ continue
1247
+
1248
+ # 计算当前句子的时长
1249
+ sentence_chars = len(sentence)
1250
+ sentence_duration = int(sentence_chars * char_duration)
1251
+
1252
+ # 添加到SubMaker
1253
+ sub_maker.subs.append(sentence)
1254
+ sub_maker.offset.append(
1255
+ (current_offset, current_offset + sentence_duration)
1256
+ )
1257
+
1258
+ # 更新偏移量
1259
+ current_offset += sentence_duration
1260
+ else:
1261
+ # 如果无法分割,则使用整个文本作为一个字幕
1262
+ sub_maker.subs = [text]
1263
+ sub_maker.offset = [(0, audio_duration_100ns)]
1264
+
1265
+ except Exception as e:
1266
+ logger.warning(f"Failed to create accurate subtitles: {str(e)}")
1267
+ # 回退到简单的字幕
1268
+ sub_maker.subs = [text]
1269
+ # 使用音频文件的实际长度,如果无法获取,则假设为10秒
1270
+ sub_maker.offset = [
1271
+ (
1272
+ 0,
1273
+ audio_duration_100ns
1274
+ if "audio_duration_100ns" in locals()
1275
+ else 10000000,
1276
+ )
1277
+ ]
1278
+
1279
+ logger.success(f"siliconflow tts succeeded: {voice_file}")
1280
+ print("s", sub_maker.subs, sub_maker.offset)
1281
+ return sub_maker
1282
+ else:
1283
+ logger.error(
1284
+ f"siliconflow tts failed with status code {response.status_code}: {response.text}"
1285
+ )
1286
+ except Exception as e:
1287
+ logger.error(f"siliconflow tts failed: {str(e)}")
1288
+
1289
+ return None
1290
+
1291
+
1292
+ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
1293
+ voice_name = is_azure_v2_voice(voice_name)
1294
+ if not voice_name:
1295
+ logger.error(f"invalid voice name: {voice_name}")
1296
+ raise ValueError(f"invalid voice name: {voice_name}")
1297
+ text = text.strip()
1298
+
1299
+ def _format_duration_to_offset(duration) -> int:
1300
+ if isinstance(duration, str):
1301
+ time_obj = datetime.strptime(duration, "%H:%M:%S.%f")
1302
+ milliseconds = (
1303
+ (time_obj.hour * 3600000)
1304
+ + (time_obj.minute * 60000)
1305
+ + (time_obj.second * 1000)
1306
+ + (time_obj.microsecond // 1000)
1307
+ )
1308
+ return milliseconds * 10000
1309
+
1310
+ if isinstance(duration, int):
1311
+ return duration
1312
+
1313
+ return 0
1314
+
1315
+ for i in range(3):
1316
+ try:
1317
+ logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
1318
+
1319
+ import azure.cognitiveservices.speech as speechsdk
1320
+
1321
+ sub_maker = SubMaker()
1322
+
1323
+ def speech_synthesizer_word_boundary_cb(evt: speechsdk.SessionEventArgs):
1324
+ # print('WordBoundary event:')
1325
+ # print('\tBoundaryType: {}'.format(evt.boundary_type))
1326
+ # print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000)))
1327
+ # print('\tDuration: {}'.format(evt.duration))
1328
+ # print('\tText: {}'.format(evt.text))
1329
+ # print('\tTextOffset: {}'.format(evt.text_offset))
1330
+ # print('\tWordLength: {}'.format(evt.word_length))
1331
+
1332
+ duration = _format_duration_to_offset(str(evt.duration))
1333
+ offset = _format_duration_to_offset(evt.audio_offset)
1334
+ sub_maker.subs.append(evt.text)
1335
+ sub_maker.offset.append((offset, offset + duration))
1336
+
1337
+ # Creates an instance of a speech config with specified subscription key and service region.
1338
+ speech_key = config.azure.get("speech_key", "")
1339
+ service_region = config.azure.get("speech_region", "")
1340
+ if not speech_key or not service_region:
1341
+ logger.error("Azure speech key or region is not set")
1342
+ return None
1343
+
1344
+ audio_config = speechsdk.audio.AudioOutputConfig(
1345
+ filename=voice_file, use_default_speaker=True
1346
+ )
1347
+ speech_config = speechsdk.SpeechConfig(
1348
+ subscription=speech_key, region=service_region
1349
+ )
1350
+ speech_config.speech_synthesis_voice_name = voice_name
1351
+ # speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
1352
+ # value='true')
1353
+ speech_config.set_property(
1354
+ property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
1355
+ value="true",
1356
+ )
1357
+
1358
+ speech_config.set_speech_synthesis_output_format(
1359
+ speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3
1360
+ )
1361
+ speech_synthesizer = speechsdk.SpeechSynthesizer(
1362
+ audio_config=audio_config, speech_config=speech_config
1363
+ )
1364
+ speech_synthesizer.synthesis_word_boundary.connect(
1365
+ speech_synthesizer_word_boundary_cb
1366
+ )
1367
+
1368
+ result = speech_synthesizer.speak_text_async(text).get()
1369
+ if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
1370
+ logger.success(f"azure v2 speech synthesis succeeded: {voice_file}")
1371
+ return sub_maker
1372
+ elif result.reason == speechsdk.ResultReason.Canceled:
1373
+ cancellation_details = result.cancellation_details
1374
+ logger.error(
1375
+ f"azure v2 speech synthesis canceled: {cancellation_details.reason}"
1376
+ )
1377
+ if cancellation_details.reason == speechsdk.CancellationReason.Error:
1378
+ logger.error(
1379
+ f"azure v2 speech synthesis error: {cancellation_details.error_details}"
1380
+ )
1381
+ logger.info(f"completed, output file: {voice_file}")
1382
+ except Exception as e:
1383
+ logger.error(f"failed, error: {str(e)}")
1384
+ return None
1385
+
1386
+
1387
+ def _format_text(text: str) -> str:
1388
+ # text = text.replace("\n", " ")
1389
+ text = text.replace("[", " ")
1390
+ text = text.replace("]", " ")
1391
+ text = text.replace("(", " ")
1392
+ text = text.replace(")", " ")
1393
+ text = text.replace("{", " ")
1394
+ text = text.replace("}", " ")
1395
+ text = text.strip()
1396
+ return text
1397
+
1398
+
1399
+ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
1400
+ """
1401
+ 优化字幕文件
1402
+ 1. 将字幕文件按照标点符号分割成多行
1403
+ 2. 逐行匹配字幕文件中的文本
1404
+ 3. 生成新的字幕文件
1405
+ """
1406
+
1407
+ text = _format_text(text)
1408
+
1409
+ def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
1410
+ """
1411
+ 1
1412
+ 00:00:00,000 --> 00:00:02,360
1413
+ 跑步是一项简单易行的运动
1414
+ """
1415
+ start_t = mktimestamp(start_time).replace(".", ",")
1416
+ end_t = mktimestamp(end_time).replace(".", ",")
1417
+ return f"{idx}\n{start_t} --> {end_t}\n{sub_text}\n"
1418
+
1419
+ start_time = -1.0
1420
+ sub_items = []
1421
+ sub_index = 0
1422
+
1423
+ script_lines = utils.split_string_by_punctuations(text)
1424
+
1425
+ def match_line(_sub_line: str, _sub_index: int):
1426
+ if len(script_lines) <= _sub_index:
1427
+ return ""
1428
+
1429
+ _line = script_lines[_sub_index]
1430
+ if _sub_line == _line:
1431
+ return script_lines[_sub_index].strip()
1432
+
1433
+ _sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
1434
+ _line_ = re.sub(r"[^\w\s]", "", _line)
1435
+ if _sub_line_ == _line_:
1436
+ return _line_.strip()
1437
+
1438
+ _sub_line_ = re.sub(r"\W+", "", _sub_line)
1439
+ _line_ = re.sub(r"\W+", "", _line)
1440
+ if _sub_line_ == _line_:
1441
+ return _line.strip()
1442
+
1443
+ return ""
1444
+
1445
+ sub_line = ""
1446
+
1447
+ try:
1448
+ for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
1449
+ _start_time, end_time = offset
1450
+ if start_time < 0:
1451
+ start_time = _start_time
1452
+
1453
+ sub = unescape(sub)
1454
+ sub_line += sub
1455
+ sub_text = match_line(sub_line, sub_index)
1456
+ if sub_text:
1457
+ sub_index += 1
1458
+ line = formatter(
1459
+ idx=sub_index,
1460
+ start_time=start_time,
1461
+ end_time=end_time,
1462
+ sub_text=sub_text,
1463
+ )
1464
+ sub_items.append(line)
1465
+ start_time = -1.0
1466
+ sub_line = ""
1467
+
1468
+ if len(sub_items) == len(script_lines):
1469
+ with open(subtitle_file, "w", encoding="utf-8") as file:
1470
+ file.write("\n".join(sub_items) + "\n")
1471
+ try:
1472
+ sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
1473
+ duration = max([tb for ((ta, tb), txt) in sbs])
1474
+ logger.info(
1475
+ f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
1476
+ )
1477
+ except Exception as e:
1478
+ logger.error(f"failed, error: {str(e)}")
1479
+ os.remove(subtitle_file)
1480
+ else:
1481
+ logger.warning(
1482
+ f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}"
1483
+ )
1484
+
1485
+ except Exception as e:
1486
+ logger.error(f"failed, error: {str(e)}")
1487
+
1488
+
1489
+ def get_audio_duration(sub_maker: submaker.SubMaker):
1490
+ """
1491
+ 获取音频时长
1492
+ """
1493
+ if not sub_maker.offset:
1494
+ return 0.0
1495
+ return sub_maker.offset[-1][1] / 10000000
1496
+
1497
+
1498
+ if __name__ == "__main__":
1499
+ voice_name = "zh-CN-XiaoxiaoMultilingualNeural-V2-Female"
1500
+ voice_name = parse_voice_name(voice_name)
1501
+ voice_name = is_azure_v2_voice(voice_name)
1502
+ print(voice_name)
1503
+
1504
+ voices = get_all_azure_voices()
1505
+ print(len(voices))
1506
+
1507
+ async def _do():
1508
+ temp_dir = utils.storage_dir("temp")
1509
+
1510
+ voice_names = [
1511
+ "zh-CN-XiaoxiaoMultilingualNeural",
1512
+ # 女性
1513
+ "zh-CN-XiaoxiaoNeural",
1514
+ "zh-CN-XiaoyiNeural",
1515
+ # 男性
1516
+ "zh-CN-YunyangNeural",
1517
+ "zh-CN-YunxiNeural",
1518
+ ]
1519
+ text = """
1520
+ 静夜思是唐代诗人李白创作的一首五言古诗。这首诗描绘了诗人在寂静的夜晚,看到窗前的明月,不禁想起远方的家乡和亲人,表达了他对家乡和亲人的深深思念之情。全诗内容是:“床前明月光,疑是地上霜。举头望明月,低头思故乡。”在这短短的四句诗中,诗人通过“明月”和“思故乡”的意象,巧妙地表达了离乡背井人的孤独与哀愁。首句“床前明月光”设景立意,通过明亮的月光引出诗人的遐想;“疑是地上霜”增添了夜晚的寒冷感,加深了诗人的孤寂之情;“举头望明月”和“低头思故乡”则是情感的升华,展现了诗人内心深处的乡愁和对家的渴望。这首诗简洁明快,情感真挚,是中国古典诗歌中非常著名的一首,也深受后人喜爱和推崇。
1521
+ """
1522
+
1523
+ text = """
1524
+ What is the meaning of life? This question has puzzled philosophers, scientists, and thinkers of all kinds for centuries. Throughout history, various cultures and individuals have come up with their interpretations and beliefs around the purpose of life. Some say it's to seek happiness and self-fulfillment, while others believe it's about contributing to the welfare of others and making a positive impact in the world. Despite the myriad of perspectives, one thing remains clear: the meaning of life is a deeply personal concept that varies from one person to another. It's an existential inquiry that encourages us to reflect on our values, desires, and the essence of our existence.
1525
+ """
1526
+
1527
+ text = """
1528
+ 预计未来3天深圳冷空气活动频繁,未来两天持续阴天有小雨,出门带好雨具;
1529
+ 10-11日持续阴天有小雨,日温差小,气温在13-17℃之间,体感阴凉;
1530
+ 12日天气短暂好转,早晚清凉;
1531
+ """
1532
+
1533
+ text = "[Opening scene: A sunny day in a suburban neighborhood. A young boy named Alex, around 8 years old, is playing in his front yard with his loyal dog, Buddy.]\n\n[Camera zooms in on Alex as he throws a ball for Buddy to fetch. Buddy excitedly runs after it and brings it back to Alex.]\n\nAlex: Good boy, Buddy! You're the best dog ever!\n\n[Buddy barks happily and wags his tail.]\n\n[As Alex and Buddy continue playing, a series of potential dangers loom nearby, such as a stray dog approaching, a ball rolling towards the street, and a suspicious-looking stranger walking by.]\n\nAlex: Uh oh, Buddy, look out!\n\n[Buddy senses the danger and immediately springs into action. He barks loudly at the stray dog, scaring it away. Then, he rushes to retrieve the ball before it reaches the street and gently nudges it back towards Alex. Finally, he stands protectively between Alex and the stranger, growling softly to warn them away.]\n\nAlex: Wow, Buddy, you're like my superhero!\n\n[Just as Alex and Buddy are about to head inside, they hear a loud crash from a nearby construction site. They rush over to investigate and find a pile of rubble blocking the path of a kitten trapped underneath.]\n\nAlex: Oh no, Buddy, we have to help!\n\n[Buddy barks in agreement and together they work to carefully move the rubble aside, allowing the kitten to escape unharmed. The kitten gratefully nuzzles against Buddy, who responds with a friendly lick.]\n\nAlex: We did it, Buddy! We saved the day again!\n\n[As Alex and Buddy walk home together, the sun begins to set, casting a warm glow over the neighborhood.]\n\nAlex: Thanks for always being there to watch over me, Buddy. You're not just my dog, you're my best friend.\n\n[Buddy barks happily and nuzzles against Alex as they disappear into the sunset, ready to face whatever adventures tomorrow may bring.]\n\n[End scene.]"
1534
+
1535
+ text = "大家好,我是乔哥,一个想帮你把信用卡全部还清的家伙!\n今天我们要聊的是信用卡的取现功能。\n你是不是也曾经因为一时的资金紧张,而拿着信用卡到ATM机取现?如果是,那你得好好看看这个视频了。\n现在都2024年了,我以为现在不会再有人用信用卡取现功能了。前几天一个粉丝发来一张图片,取现1万。\n信用卡取现有三个弊端。\n一,信用卡取现功能代价可不小。会先收取一个取现手续费,比如这个粉丝,取现1万,按2.5%收取手续费,收取了250元。\n二,信用卡正常消费有最长56天的免息期,但取现不享受免息期。从取现那一天开始,每天按照万5收取利息,这个粉丝用了11天,收取了55元利息。\n三,频繁的取现行为,银行会认为你资金紧张,会被标记为高风险用户,影响你的综合评分和额度。\n那么,如果你资金紧张了,该怎么办呢?\n乔哥给你支一招,用破思机摩擦信用卡,只需要少量的手续费,而且还可以享受最长56天的免息期。\n最后,如果你对玩卡感兴趣,可以找乔哥领取一本《卡神秘籍》,用卡过程中遇到任何疑惑,也欢迎找乔哥交流。\n别忘了,关注乔哥,回复用卡技巧,免费领取《2024用卡技巧》,让我们一起成为用卡高手!"
1536
+
1537
+ text = """
1538
+ 2023全年业绩速览
1539
+ 公司全年累计实现营业收入1476.94亿元,同比增长19.01%,归母净利润747.34亿元,同比增长19.16%。EPS达到59.49元。第四季度单季,营业收入444.25亿元,同比增长20.26%,环比增长31.86%;归母净利润218.58亿元,同比增长19.33%,环比增长29.37%。这一阶段
1540
+ 的业绩表现不仅突显了公司的增长动力和盈利能力,也反映出公司在竞争激烈的市场环境中保持了良好的发展势头。
1541
+ 2023年Q4业绩速览
1542
+ 第四季度,营业收入贡献主要增长点;销售费用高增致盈利能力承压;税金同比上升27%,扰动净利率表现。
1543
+ 业绩解读
1544
+ 利润方面,2023全年贵州茅台,>归母净利润增速为19%,其中营业收入正贡献18%,营业成本正贡献百分之一,管理费用正贡献百分之一点四。(注:归母净利润增速值=营业收入增速+各科目贡献,展示贡献/拖累的前四名科目,且要求贡献值/净利润增速>15%)
1545
+ """
1546
+ text = "静夜思是唐代诗人李白创作的一首五言古诗。这首诗描绘了诗人在寂静的夜晚,看到窗前的明月,不禁想起远方的家乡和亲人"
1547
+
1548
+ text = _format_text(text)
1549
+ lines = utils.split_string_by_punctuations(text)
1550
+ print(lines)
1551
+
1552
+ for voice_name in voice_names:
1553
+ voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
1554
+ subtitle_file = f"{temp_dir}/tts.mp3.srt"
1555
+ sub_maker = azure_tts_v2(
1556
+ text=text, voice_name=voice_name, voice_file=voice_file
1557
+ )
1558
+ create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
1559
+ audio_duration = get_audio_duration(sub_maker)
1560
+ print(f"voice: {voice_name}, audio duration: {audio_duration}s")
1561
+
1562
+ loop = asyncio.get_event_loop_policy().get_event_loop()
1563
+ try:
1564
+ loop.run_until_complete(_do())
1565
+ finally:
1566
+ loop.close()
app/utils/utils.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import locale
3
+ import os
4
+ from pathlib import Path
5
+ import threading
6
+ from typing import Any
7
+ from uuid import uuid4
8
+
9
+ import urllib3
10
+ from loguru import logger
11
+
12
+ from app.models import const
13
+
14
+ urllib3.disable_warnings()
15
+
16
+
17
+ def get_response(status: int, data: Any = None, message: str = ""):
18
+ obj = {
19
+ "status": status,
20
+ }
21
+ if data:
22
+ obj["data"] = data
23
+ if message:
24
+ obj["message"] = message
25
+ return obj
26
+
27
+
28
+ def to_json(obj):
29
+ try:
30
+ # Define a helper function to handle different types of objects
31
+ def serialize(o):
32
+ # If the object is a serializable type, return it directly
33
+ if isinstance(o, (int, float, bool, str)) or o is None:
34
+ return o
35
+ # If the object is binary data, convert it to a base64-encoded string
36
+ elif isinstance(o, bytes):
37
+ return "*** binary data ***"
38
+ # If the object is a dictionary, recursively process each key-value pair
39
+ elif isinstance(o, dict):
40
+ return {k: serialize(v) for k, v in o.items()}
41
+ # If the object is a list or tuple, recursively process each element
42
+ elif isinstance(o, (list, tuple)):
43
+ return [serialize(item) for item in o]
44
+ # If the object is a custom type, attempt to return its __dict__ attribute
45
+ elif hasattr(o, "__dict__"):
46
+ return serialize(o.__dict__)
47
+ # Return None for other cases (or choose to raise an exception)
48
+ else:
49
+ return None
50
+
51
+ # Use the serialize function to process the input object
52
+ serialized_obj = serialize(obj)
53
+
54
+ # Serialize the processed object into a JSON string
55
+ return json.dumps(serialized_obj, ensure_ascii=False, indent=4)
56
+ except Exception:
57
+ return None
58
+
59
+
60
+ def get_uuid(remove_hyphen: bool = False):
61
+ u = str(uuid4())
62
+ if remove_hyphen:
63
+ u = u.replace("-", "")
64
+ return u
65
+
66
+
67
+ def root_dir():
68
+ return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
69
+
70
+
71
+ def storage_dir(sub_dir: str = "", create: bool = False):
72
+ d = os.path.join(root_dir(), "storage")
73
+ if sub_dir:
74
+ d = os.path.join(d, sub_dir)
75
+ if create and not os.path.exists(d):
76
+ os.makedirs(d)
77
+
78
+ return d
79
+
80
+
81
+ def resource_dir(sub_dir: str = ""):
82
+ d = os.path.join(root_dir(), "resource")
83
+ if sub_dir:
84
+ d = os.path.join(d, sub_dir)
85
+ return d
86
+
87
+
88
+ def task_dir(sub_dir: str = ""):
89
+ d = os.path.join(storage_dir(), "tasks")
90
+ if sub_dir:
91
+ d = os.path.join(d, sub_dir)
92
+ if not os.path.exists(d):
93
+ os.makedirs(d)
94
+ return d
95
+
96
+
97
+ def font_dir(sub_dir: str = ""):
98
+ d = resource_dir("fonts")
99
+ if sub_dir:
100
+ d = os.path.join(d, sub_dir)
101
+ if not os.path.exists(d):
102
+ os.makedirs(d)
103
+ return d
104
+
105
+
106
+ def song_dir(sub_dir: str = ""):
107
+ d = resource_dir("songs")
108
+ if sub_dir:
109
+ d = os.path.join(d, sub_dir)
110
+ if not os.path.exists(d):
111
+ os.makedirs(d)
112
+ return d
113
+
114
+
115
+ def public_dir(sub_dir: str = ""):
116
+ d = resource_dir("public")
117
+ if sub_dir:
118
+ d = os.path.join(d, sub_dir)
119
+ if not os.path.exists(d):
120
+ os.makedirs(d)
121
+ return d
122
+
123
+
124
+ def run_in_background(func, *args, **kwargs):
125
+ def run():
126
+ try:
127
+ func(*args, **kwargs)
128
+ except Exception as e:
129
+ logger.error(f"run_in_background error: {e}")
130
+
131
+ thread = threading.Thread(target=run)
132
+ thread.start()
133
+ return thread
134
+
135
+
136
+ def time_convert_seconds_to_hmsm(seconds) -> str:
137
+ hours = int(seconds // 3600)
138
+ seconds = seconds % 3600
139
+ minutes = int(seconds // 60)
140
+ milliseconds = int(seconds * 1000) % 1000
141
+ seconds = int(seconds % 60)
142
+ return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, seconds, milliseconds)
143
+
144
+
145
+ def text_to_srt(idx: int, msg: str, start_time: float, end_time: float) -> str:
146
+ start_time = time_convert_seconds_to_hmsm(start_time)
147
+ end_time = time_convert_seconds_to_hmsm(end_time)
148
+ srt = """%d
149
+ %s --> %s
150
+ %s
151
+ """ % (
152
+ idx,
153
+ start_time,
154
+ end_time,
155
+ msg,
156
+ )
157
+ return srt
158
+
159
+
160
+ def str_contains_punctuation(word):
161
+ for p in const.PUNCTUATIONS:
162
+ if p in word:
163
+ return True
164
+ return False
165
+
166
+
167
+ def split_string_by_punctuations(s):
168
+ result = []
169
+ txt = ""
170
+
171
+ previous_char = ""
172
+ next_char = ""
173
+ for i in range(len(s)):
174
+ char = s[i]
175
+ if char == "\n":
176
+ result.append(txt.strip())
177
+ txt = ""
178
+ continue
179
+
180
+ if i > 0:
181
+ previous_char = s[i - 1]
182
+ if i < len(s) - 1:
183
+ next_char = s[i + 1]
184
+
185
+ if char == "." and previous_char.isdigit() and next_char.isdigit():
186
+ # # In the case of "withdraw 10,000, charged at 2.5% fee", the dot in "2.5" should not be treated as a line break marker
187
+ txt += char
188
+ continue
189
+
190
+ if char not in const.PUNCTUATIONS:
191
+ txt += char
192
+ else:
193
+ result.append(txt.strip())
194
+ txt = ""
195
+ result.append(txt.strip())
196
+ # filter empty string
197
+ result = list(filter(None, result))
198
+ return result
199
+
200
+
201
+ def md5(text):
202
+ import hashlib
203
+
204
+ return hashlib.md5(text.encode("utf-8")).hexdigest()
205
+
206
+
207
+ def get_system_locale():
208
+ try:
209
+ loc = locale.getdefaultlocale()
210
+ # zh_CN, zh_TW return zh
211
+ # en_US, en_GB return en
212
+ language_code = loc[0].split("_")[0]
213
+ return language_code
214
+ except Exception:
215
+ return "en"
216
+
217
+
218
+ def load_locales(i18n_dir):
219
+ _locales = {}
220
+ for root, dirs, files in os.walk(i18n_dir):
221
+ for file in files:
222
+ if file.endswith(".json"):
223
+ lang = file.split(".")[0]
224
+ with open(os.path.join(root, file), "r", encoding="utf-8") as f:
225
+ _locales[lang] = json.loads(f.read())
226
+ return _locales
227
+
228
+
229
+ def parse_extension(filename):
230
+ return Path(filename).suffix.lower().lstrip('.')
config.toml ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [app]
2
+ video_source = "pexels" # "pexels" or "pixabay"
3
+
4
+ # 是否隐藏配置面板
5
+ hide_config = false
6
+
7
+ # Pexels API Key
8
+ # Register at https://www.pexels.com/api/ to get your API key.
9
+ # You can use multiple keys to avoid rate limits.
10
+ # For example: pexels_api_keys = ["123adsf4567adf89","abd1321cd13efgfdfhi"]
11
+ # 特别注意格式,Key 用英文双引号括起来,多个Key用逗号隔开
12
+ pexels_api_keys = []
13
+
14
+ # Pixabay API Key
15
+ # Register at https://pixabay.com/api/docs/ to get your API key.
16
+ # You can use multiple keys to avoid rate limits.
17
+ # For example: pixabay_api_keys = ["123adsf4567adf89","abd1321cd13efgfdfhi"]
18
+ # 特别注意格式,Key 用英文双引号括起来,多个Key用逗号隔开
19
+ pixabay_api_keys = []
20
+
21
+ # 支持的提供商 (Supported providers):
22
+ # openai
23
+ # moonshot (月之暗面)
24
+ # azure
25
+ # qwen (通义千问)
26
+ # deepseek
27
+ # gemini
28
+ # ollama
29
+ # g4f
30
+ # oneapi
31
+ # cloudflare
32
+ # ernie (文心一言)
33
+ llm_provider = "cloudflare"
34
+
35
+ ########## Pollinations AI Settings
36
+ # Visit https://pollinations.ai/ to learn more
37
+ # API Key is optional - leave empty for public access
38
+ pollinations_api_key = ""
39
+ # Default base URL for Pollinations API
40
+ pollinations_base_url = "https://pollinations.ai/api/v1"
41
+ # Default model for text generation
42
+ pollinations_model_name = "openai-fast"
43
+
44
+ ########## Ollama Settings
45
+ # No need to set it unless you want to use your own proxy
46
+ ollama_base_url = ""
47
+ # Check your available models at https://ollama.com/library
48
+ ollama_model_name = ""
49
+
50
+ ########## OpenAI API Key
51
+ # Get your API key at https://platform.openai.com/api-keys
52
+ openai_api_key = ""
53
+ # No need to set it unless you want to use your own proxy
54
+ openai_base_url = ""
55
+ # Check your available models at https://platform.openai.com/account/limits
56
+ openai_model_name = "gpt-4o-mini"
57
+
58
+ ########## Moonshot API Key
59
+ # Visit https://platform.moonshot.cn/console/api-keys to get your API key.
60
+ moonshot_api_key = ""
61
+ moonshot_base_url = "https://api.moonshot.cn/v1"
62
+ moonshot_model_name = "moonshot-v1-8k"
63
+
64
+ ########## OneAPI API Key
65
+ # Visit https://github.com/songquanpeng/one-api to get your API key
66
+ oneapi_api_key = ""
67
+ oneapi_base_url = ""
68
+ oneapi_model_name = ""
69
+
70
+ ########## G4F
71
+ # Visit https://github.com/xtekky/gpt4free to get more details
72
+ # Supported model list: https://github.com/xtekky/gpt4free/blob/main/g4f/models.py
73
+ g4f_model_name = "gpt-3.5-turbo"
74
+
75
+ ########## Azure API Key
76
+ # Visit https://learn.microsoft.com/zh-cn/azure/ai-services/openai/ to get more details
77
+ # API documentation: https://learn.microsoft.com/zh-cn/azure/ai-services/openai/reference
78
+ azure_api_key = ""
79
+ azure_base_url = ""
80
+ azure_model_name = "gpt-35-turbo" # replace with your model deployment name
81
+ azure_api_version = "2024-02-15-preview"
82
+
83
+ ########## Gemini API Key
84
+ gemini_api_key = ""
85
+ gemini_model_name = "gemini-1.0-pro"
86
+
87
+ ########## Qwen API Key
88
+ # Visit https://dashscope.console.aliyun.com/apiKey to get your API key
89
+ # Visit below links to get more details
90
+ # https://tongyi.aliyun.com/qianwen/
91
+ # https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
92
+ qwen_api_key = ""
93
+ qwen_model_name = "qwen-max"
94
+
95
+
96
+ ########## DeepSeek API Key
97
+ # Visit https://platform.deepseek.com/api_keys to get your API key
98
+ deepseek_api_key = ""
99
+ deepseek_base_url = "https://api.deepseek.com"
100
+ deepseek_model_name = "deepseek-chat"
101
+
102
+ # Subtitle Provider, "edge" or "whisper"
103
+ # If empty, the subtitle will not be generated
104
+ subtitle_provider = "edge"
105
+
106
+ #
107
+ # ImageMagick
108
+ #
109
+ # Once you have installed it, ImageMagick will be automatically detected, except on Windows!
110
+ # On Windows, for example "C:\Program Files (x86)\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
111
+ # Download from https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe
112
+
113
+ # imagemagick_path = "C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16\\magick.exe"
114
+
115
+
116
+ #
117
+ # FFMPEG
118
+ #
119
+ # 通常情况下,ffmpeg 会被自动下载,并且会被自动检测到。
120
+ # 但是如果你的环境有问题,无法自动下载,可能会遇到如下错误:
121
+ # RuntimeError: No ffmpeg exe could be found.
122
+ # Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
123
+ # 此时你可以手动下载 ffmpeg 并设置 ffmpeg_path,下载地址:https://www.gyan.dev/ffmpeg/builds/
124
+
125
+ # Under normal circumstances, ffmpeg is downloaded automatically and detected automatically.
126
+ # However, if there is an issue with your environment that prevents automatic downloading, you might encounter the following error:
127
+ # RuntimeError: No ffmpeg exe could be found.
128
+ # Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
129
+ # In such cases, you can manually download ffmpeg and set the ffmpeg_path, download link: https://www.gyan.dev/ffmpeg/builds/
130
+
131
+ # ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
132
+ #########################################################################################
133
+
134
+ # 当视频生成成功后,API服务提供的视频下载接入点,默��为当前服务的地址和监听端口
135
+ # 比如 http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
136
+ # 如果你需要使用域名对外提供服务(一般会用nginx做代理),则可以设置为你的域名
137
+ # 比如 https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
138
+ # endpoint="https://xxxx.com"
139
+
140
+ # When the video is successfully generated, the API service provides a download endpoint for the video, defaulting to the service's current address and listening port.
141
+ # For example, http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
142
+ # If you need to provide the service externally using a domain name (usually done with nginx as a proxy), you can set it to your domain name.
143
+ # For example, https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
144
+ # endpoint="https://xxxx.com"
145
+ endpoint = ""
146
+
147
+
148
+ # Video material storage location
149
+ # material_directory = "" # Indicates that video materials will be downloaded to the default folder, the default folder is ./storage/cache_videos under the current project
150
+ # material_directory = "/user/harry/videos" # Indicates that video materials will be downloaded to a specified folder
151
+ # material_directory = "task" # Indicates that video materials will be downloaded to the current task's folder, this method does not allow sharing of already downloaded video materials
152
+
153
+ # 视频素材存放位置
154
+ # material_directory = "" #表示将视频素材下载到默认的文件夹,默认文件夹为当前项目下的 ./storage/cache_videos
155
+ # material_directory = "/user/harry/videos" #表示将视频素材下载到指定的文件夹中
156
+ # material_directory = "task" #表示将视频素材下载到当前任务的文件夹中,这种方式无法共享已经下载的视频素材
157
+
158
+ material_directory = ""
159
+
160
+ # Used for state management of the task
161
+ enable_redis = false
162
+ redis_host = "localhost"
163
+ redis_port = 6379
164
+ redis_db = 0
165
+ redis_password = ""
166
+
167
+ # 文生视频时的最大并发任务数
168
+ max_concurrent_tasks = 5
169
+
170
+
171
+ [whisper]
172
+ # Only effective when subtitle_provider is "whisper"
173
+
174
+ # Run on GPU with FP16
175
+ # model = WhisperModel(model_size, device="cuda", compute_type="float16")
176
+
177
+ # Run on GPU with INT8
178
+ # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
179
+
180
+ # Run on CPU with INT8
181
+ # model = WhisperModel(model_size, device="cpu", compute_type="int8")
182
+
183
+ # recommended model_size: "large-v3"
184
+ model_size = "large-v3"
185
+ # if you want to use GPU, set device="cuda"
186
+ device = "CPU"
187
+ compute_type = "int8"
188
+
189
+
190
+ [proxy]
191
+ ### Use a proxy to access the Pexels API
192
+ ### Format: "http://<username>:<password>@<proxy>:<port>"
193
+ ### Example: "http://user:pass@proxy:1234"
194
+ ### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
195
+
196
+ # http = "http://10.10.1.10:3128"
197
+ # https = "http://10.10.1.10:1080"
198
+
199
+ [azure]
200
+ # Azure Speech API Key
201
+ # Get your API key at https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices
202
+ speech_key = ""
203
+ speech_region = ""
204
+
205
+ [siliconflow]
206
+ # SiliconFlow API Key
207
+ # Get your API key at https://siliconflow.cn
208
+ api_key = ""
209
+
210
+ [ui]
211
+ # UI related settings
212
+ # 是否隐藏日志信息
213
+ # Whether to hide logs in the UI
214
+ hide_log = false
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ moviepy==2.1.2
2
+ streamlit==1.45.0
3
+ edge_tts==6.1.19
4
+ fastapi==0.115.6
5
+ uvicorn==0.32.1
6
+ openai==1.56.1
7
+ faster-whisper==1.1.0
8
+ loguru==0.7.3
9
+ google.generativeai==0.8.3
10
+ dashscope==1.20.14
11
+ g4f==0.5.2.2
12
+ azure-cognitiveservices-speech==1.41.1
13
+ redis==5.2.0
14
+ python-multipart==0.0.19
15
+ pyyaml
16
+ requests>=2.31.0