Spaces:

chaowenguo
/

asvwaefgvwaegqw

Running

App Files Files Community

chaowenguo commited on 26 days ago

Commit

1e7eefa

verified ·

1 Parent(s): 293ab53

Upload 29 files

Browse files

Files changed (29) hide show

app/__init__.py +0 -0
app/asgi.py +82 -0
app/config/__init__.py +56 -0
app/config/config.py +78 -0
app/controllers/base.py +31 -0
app/controllers/manager/base_manager.py +64 -0
app/controllers/manager/memory_manager.py +18 -0
app/controllers/manager/redis_manager.py +56 -0
app/controllers/ping.py +13 -0
app/controllers/v1/base.py +11 -0
app/controllers/v1/llm.py +45 -0
app/controllers/v1/video.py +287 -0
app/models/__init__.py +0 -0
app/models/const.py +25 -0
app/models/exception.py +28 -0
app/models/schema.py +303 -0
app/router.py +17 -0
app/services/__init__.py +0 -0
app/services/llm.py +445 -0
app/services/material.py +268 -0
app/services/state.py +158 -0
app/services/subtitle.py +299 -0
app/services/task.py +339 -0
app/services/utils/video_effects.py +21 -0
app/services/video.py +531 -0
app/services/voice.py +1566 -0
app/utils/utils.py +230 -0
config.toml +214 -0
requirements.txt +16 -0

app/__init__.py ADDED Viewed

File without changes

app/asgi.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""Application implementation - ASGI."""
+import os
+from fastapi import FastAPI, Request
+from fastapi.exceptions import RequestValidationError
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from fastapi.staticfiles import StaticFiles
+from loguru import logger
+from app.config import config
+from app.models.exception import HttpException
+from app.router import root_api_router
+from app.utils import utils
+def exception_handler(request: Request, e: HttpException):
+    return JSONResponse(
+        status_code=e.status_code,
+        content=utils.get_response(e.status_code, e.data, e.message),
+    )
+def validation_exception_handler(request: Request, e: RequestValidationError):
+    return JSONResponse(
+        status_code=400,
+        content=utils.get_response(
+            status=400, data=e.errors(), message="field required"
+        ),
+    )
+def get_application() -> FastAPI:
+    """Initialize FastAPI application.
+    Returns:
+       FastAPI: Application object instance.
+    """
+    instance = FastAPI(
+        title=config.project_name,
+        description=config.project_description,
+        version=config.project_version,
+        debug=False,
+    )
+    instance.include_router(root_api_router)
+    instance.add_exception_handler(HttpException, exception_handler)
+    instance.add_exception_handler(RequestValidationError, validation_exception_handler)
+    return instance
+app = get_application()
+# Configures the CORS middleware for the FastAPI app
+cors_allowed_origins_str = os.getenv("CORS_ALLOWED_ORIGINS", "")
+origins = cors_allowed_origins_str.split(",") if cors_allowed_origins_str else ["*"]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+task_dir = utils.task_dir()
+app.mount(
+    "/tasks", StaticFiles(directory=task_dir, html=True, follow_symlink=True), name=""
+)
+public_dir = utils.public_dir()
+app.mount("/", StaticFiles(directory=public_dir, html=True), name="")
+@app.on_event("shutdown")
+def shutdown_event():
+    logger.info("shutdown event")
+@app.on_event("startup")
+def startup_event():
+    logger.info("startup event")

app/config/__init__.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+import sys
+from loguru import logger
+from app.config import config
+from app.utils import utils
+def __init_logger():
+    # _log_file = utils.storage_dir("logs/server.log")
+    _lvl = config.log_level
+    root_dir = os.path.dirname(
+        os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+    )
+    def format_record(record):
+        # 获取日志记录中的文件全路径
+        file_path = record["file"].path
+        # 将绝对路径转换为相对于项目根目录的路径
+        relative_path = os.path.relpath(file_path, root_dir)
+        # 更新记录中的文件路径
+        record["file"].path = f"./{relative_path}"
+        # 返回修改后的格式字符串
+        # 您可以根据需要调整这里的格式
+        _format = (
+            "<green>{time:%Y-%m-%d %H:%M:%S}</> | "
+            + "<level>{level}</> | "
+            + '"{file.path}:{line}":<blue> {function}</> '
+            + "- <level>{message}</>"
+            + "\n"
+        )
+        return _format
+    logger.remove()
+    logger.add(
+        sys.stdout,
+        level=_lvl,
+        format=format_record,
+        colorize=True,
+    )
+    # logger.add(
+    #     _log_file,
+    #     level=_lvl,
+    #     format=format_record,
+    #     rotation="00:00",
+    #     retention="3 days",
+    #     backtrace=True,
+    #     diagnose=True,
+    #     enqueue=True,
+    # )
+__init_logger()

app/config/config.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import shutil
+import socket
+import toml
+from loguru import logger
+root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+config_file = f"{root_dir}/config.toml"
+def load_config():
+    # fix: IsADirectoryError: [Errno 21] Is a directory: '/MoneyPrinterTurbo/config.toml'
+    if os.path.isdir(config_file):
+        shutil.rmtree(config_file)
+    if not os.path.isfile(config_file):
+        example_file = f"{root_dir}/config.example.toml"
+        if os.path.isfile(example_file):
+            shutil.copyfile(example_file, config_file)
+            logger.info("copy config.example.toml to config.toml")
+    logger.info(f"load config from file: {config_file}")
+    try:
+        _config_ = toml.load(config_file)
+    except Exception as e:
+        logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig")
+        with open(config_file, mode="r", encoding="utf-8-sig") as fp:
+            _cfg_content = fp.read()
+            _config_ = toml.loads(_cfg_content)
+    return _config_
+def save_config():
+    with open(config_file, "w", encoding="utf-8") as f:
+        _cfg["app"] = app
+        _cfg["azure"] = azure
+        _cfg["siliconflow"] = siliconflow
+        _cfg["ui"] = ui
+        f.write(toml.dumps(_cfg))
+_cfg = load_config()
+app = _cfg.get("app", {})
+whisper = _cfg.get("whisper", {})
+proxy = _cfg.get("proxy", {})
+azure = _cfg.get("azure", {})
+siliconflow = _cfg.get("siliconflow", {})
+ui = _cfg.get(
+    "ui",
+    {
+        "hide_log": False,
+    },
+)
+hostname = socket.gethostname()
+log_level = _cfg.get("log_level", "DEBUG")
+listen_host = _cfg.get("listen_host", "0.0.0.0")
+listen_port = _cfg.get("listen_port", 8080)
+project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
+project_description = _cfg.get(
+    "project_description",
+    "<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>",
+)
+project_version = _cfg.get("project_version", "1.2.6")
+reload_debug = False
+imagemagick_path = app.get("imagemagick_path", "")
+if imagemagick_path and os.path.isfile(imagemagick_path):
+    os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path
+ffmpeg_path = app.get("ffmpeg_path", "")
+if ffmpeg_path and os.path.isfile(ffmpeg_path):
+    os.environ["IMAGEIO_FFMPEG_EXE"] = ffmpeg_path
+logger.info(f"{project_name} v{project_version}")

app/controllers/base.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from uuid import uuid4
+from fastapi import Request
+from app.config import config
+from app.models.exception import HttpException
+def get_task_id(request: Request):
+    task_id = request.headers.get("x-task-id")
+    if not task_id:
+        task_id = uuid4()
+    return str(task_id)
+def get_api_key(request: Request):
+    api_key = request.headers.get("x-api-key")
+    return api_key
+def verify_token(request: Request):
+    token = get_api_key(request)
+    if token != config.app.get("api_key", ""):
+        request_id = get_task_id(request)
+        request_url = request.url
+        user_agent = request.headers.get("user-agent")
+        raise HttpException(
+            task_id=request_id,
+            status_code=401,
+            message=f"invalid token: {request_url}, {user_agent}",
+        )

app/controllers/manager/base_manager.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import threading
+from typing import Any, Callable, Dict
+class TaskManager:
+    def __init__(self, max_concurrent_tasks: int):
+        self.max_concurrent_tasks = max_concurrent_tasks
+        self.current_tasks = 0
+        self.lock = threading.Lock()
+        self.queue = self.create_queue()
+    def create_queue(self):
+        raise NotImplementedError()
+    def add_task(self, func: Callable, *args: Any, **kwargs: Any):
+        with self.lock:
+            if self.current_tasks < self.max_concurrent_tasks:
+                print(f"add task: {func.__name__}, current_tasks: {self.current_tasks}")
+                self.execute_task(func, *args, **kwargs)
+            else:
+                print(
+                    f"enqueue task: {func.__name__}, current_tasks: {self.current_tasks}"
+                )
+                self.enqueue({"func": func, "args": args, "kwargs": kwargs})
+    def execute_task(self, func: Callable, *args: Any, **kwargs: Any):
+        thread = threading.Thread(
+            target=self.run_task, args=(func, *args), kwargs=kwargs
+        )
+        thread.start()
+    def run_task(self, func: Callable, *args: Any, **kwargs: Any):
+        try:
+            with self.lock:
+                self.current_tasks += 1
+            func(*args, **kwargs)  # call the function here, passing *args and **kwargs.
+        finally:
+            self.task_done()
+    def check_queue(self):
+        with self.lock:
+            if (
+                self.current_tasks < self.max_concurrent_tasks
+                and not self.is_queue_empty()
+            ):
+                task_info = self.dequeue()
+                func = task_info["func"]
+                args = task_info.get("args", ())
+                kwargs = task_info.get("kwargs", {})
+                self.execute_task(func, *args, **kwargs)
+    def task_done(self):
+        with self.lock:
+            self.current_tasks -= 1
+        self.check_queue()
+    def enqueue(self, task: Dict):
+        raise NotImplementedError()
+    def dequeue(self):
+        raise NotImplementedError()
+    def is_queue_empty(self):
+        raise NotImplementedError()

app/controllers/manager/memory_manager.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from queue import Queue
+from typing import Dict
+from app.controllers.manager.base_manager import TaskManager
+class InMemoryTaskManager(TaskManager):
+    def create_queue(self):
+        return Queue()
+    def enqueue(self, task: Dict):
+        self.queue.put(task)
+    def dequeue(self):
+        return self.queue.get()
+    def is_queue_empty(self):
+        return self.queue.empty()

app/controllers/manager/redis_manager.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import json
+from typing import Dict
+import redis
+from app.controllers.manager.base_manager import TaskManager
+from app.models.schema import VideoParams
+from app.services import task as tm
+FUNC_MAP = {
+    "start": tm.start,
+    # 'start_test': tm.start_test
+}
+class RedisTaskManager(TaskManager):
+    def __init__(self, max_concurrent_tasks: int, redis_url: str):
+        self.redis_client = redis.Redis.from_url(redis_url)
+        super().__init__(max_concurrent_tasks)
+    def create_queue(self):
+        return "task_queue"
+    def enqueue(self, task: Dict):
+        task_with_serializable_params = task.copy()
+        if "params" in task["kwargs"] and isinstance(
+            task["kwargs"]["params"], VideoParams
+        ):
+            task_with_serializable_params["kwargs"]["params"] = task["kwargs"][
+                "params"
+            ].dict()
+        # 将函数对象转换为其名称
+        task_with_serializable_params["func"] = task["func"].__name__
+        self.redis_client.rpush(self.queue, json.dumps(task_with_serializable_params))
+    def dequeue(self):
+        task_json = self.redis_client.lpop(self.queue)
+        if task_json:
+            task_info = json.loads(task_json)
+            # 将函数名称转换回函数对象
+            task_info["func"] = FUNC_MAP[task_info["func"]]
+            if "params" in task_info["kwargs"] and isinstance(
+                task_info["kwargs"]["params"], dict
+            ):
+                task_info["kwargs"]["params"] = VideoParams(
+                    **task_info["kwargs"]["params"]
+                )
+            return task_info
+        return None
+    def is_queue_empty(self):
+        return self.redis_client.llen(self.queue) == 0

app/controllers/ping.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from fastapi import APIRouter, Request
+router = APIRouter()
+@router.get(
+    "/ping",
+    tags=["Health Check"],
+    description="检查服务可用性",
+    response_description="pong",
+)
+def ping(request: Request) -> str:
+    return "pong"

app/controllers/v1/base.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from fastapi import APIRouter
+def new_router(dependencies=None):
+    router = APIRouter()
+    router.tags = ["V1"]
+    router.prefix = "/api/v1"
+    # 将认证依赖项应用于所有路由
+    if dependencies:
+        router.dependencies = dependencies
+    return router

app/controllers/v1/llm.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from fastapi import Request
+from app.controllers.v1.base import new_router
+from app.models.schema import (
+    VideoScriptRequest,
+    VideoScriptResponse,
+    VideoTermsRequest,
+    VideoTermsResponse,
+)
+from app.services import llm
+from app.utils import utils
+# authentication dependency
+# router = new_router(dependencies=[Depends(base.verify_token)])
+router = new_router()
+@router.post(
+    "/scripts",
+    response_model=VideoScriptResponse,
+    summary="Create a script for the video",
+)
+def generate_video_script(request: Request, body: VideoScriptRequest):
+    video_script = llm.generate_script(
+        video_subject=body.video_subject,
+        language=body.video_language,
+        paragraph_number=body.paragraph_number,
+    )
+    response = {"video_script": video_script}
+    return utils.get_response(200, response)
+@router.post(
+    "/terms",
+    response_model=VideoTermsResponse,
+    summary="Generate video terms based on the video script",
+)
+def generate_video_terms(request: Request, body: VideoTermsRequest):
+    video_terms = llm.generate_terms(
+        video_subject=body.video_subject,
+        video_script=body.video_script,
+        amount=body.amount,
+    )
+    response = {"video_terms": video_terms}
+    return utils.get_response(200, response)

app/controllers/v1/video.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import glob
+import os
+import pathlib
+import shutil
+from typing import Union
+from fastapi import BackgroundTasks, Depends, Path, Request, UploadFile
+from fastapi.params import File
+from fastapi.responses import FileResponse, StreamingResponse
+from loguru import logger
+from app.config import config
+from app.controllers import base
+from app.controllers.manager.memory_manager import InMemoryTaskManager
+from app.controllers.manager.redis_manager import RedisTaskManager
+from app.controllers.v1.base import new_router
+from app.models.exception import HttpException
+from app.models.schema import (
+    AudioRequest,
+    BgmRetrieveResponse,
+    BgmUploadResponse,
+    SubtitleRequest,
+    TaskDeletionResponse,
+    TaskQueryRequest,
+    TaskQueryResponse,
+    TaskResponse,
+    TaskVideoRequest,
+)
+from app.services import state as sm
+from app.services import task as tm
+from app.utils import utils
+# 认证依赖项
+# router = new_router(dependencies=[Depends(base.verify_token)])
+router = new_router()
+_enable_redis = config.app.get("enable_redis", False)
+_redis_host = config.app.get("redis_host", "localhost")
+_redis_port = config.app.get("redis_port", 6379)
+_redis_db = config.app.get("redis_db", 0)
+_redis_password = config.app.get("redis_password", None)
+_max_concurrent_tasks = config.app.get("max_concurrent_tasks", 5)
+redis_url = f"redis://:{_redis_password}@{_redis_host}:{_redis_port}/{_redis_db}"
+# 根据配置选择合适的任务管理器
+if _enable_redis:
+    task_manager = RedisTaskManager(
+        max_concurrent_tasks=_max_concurrent_tasks, redis_url=redis_url
+    )
+else:
+    task_manager = InMemoryTaskManager(max_concurrent_tasks=_max_concurrent_tasks)
+@router.post("/videos", response_model=TaskResponse, summary="Generate a short video")
+def create_video(
+    background_tasks: BackgroundTasks, request: Request, body: TaskVideoRequest
+):
+    return create_task(request, body, stop_at="video")
+@router.post("/subtitle", response_model=TaskResponse, summary="Generate subtitle only")
+def create_subtitle(
+    background_tasks: BackgroundTasks, request: Request, body: SubtitleRequest
+):
+    return create_task(request, body, stop_at="subtitle")
+@router.post("/audio", response_model=TaskResponse, summary="Generate audio only")
+def create_audio(
+    background_tasks: BackgroundTasks, request: Request, body: AudioRequest
+):
+    return create_task(request, body, stop_at="audio")
+def create_task(
+    request: Request,
+    body: Union[TaskVideoRequest, SubtitleRequest, AudioRequest],
+    stop_at: str,
+):
+    task_id = utils.get_uuid()
+    request_id = base.get_task_id(request)
+    try:
+        task = {
+            "task_id": task_id,
+            "request_id": request_id,
+            "params": body.model_dump(),
+        }
+        sm.state.update_task(task_id)
+        task_manager.add_task(tm.start, task_id=task_id, params=body, stop_at=stop_at)
+        logger.success(f"Task created: {utils.to_json(task)}")
+        return utils.get_response(200, task)
+    except ValueError as e:
+        raise HttpException(
+            task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}"
+        )
+from fastapi import Query
+@router.get("/tasks", response_model=TaskQueryResponse, summary="Get all tasks")
+def get_all_tasks(request: Request, page: int = Query(1, ge=1), page_size: int = Query(10, ge=1)):
+    request_id = base.get_task_id(request)
+    tasks, total = sm.state.get_all_tasks(page, page_size)
+    response = {
+        "tasks": tasks,
+        "total": total,
+        "page": page,
+        "page_size": page_size,
+    }
+    return utils.get_response(200, response)
+@router.get(
+    "/tasks/{task_id}", response_model=TaskQueryResponse, summary="Query task status"
+)
+def get_task(
+    request: Request,
+    task_id: str = Path(..., description="Task ID"),
+    query: TaskQueryRequest = Depends(),
+):
+    endpoint = config.app.get("endpoint", "")
+    if not endpoint:
+        endpoint = str(request.base_url)
+    endpoint = endpoint.rstrip("/")
+    request_id = base.get_task_id(request)
+    task = sm.state.get_task(task_id)
+    if task:
+        task_dir = utils.task_dir()
+        def file_to_uri(file):
+            if not file.startswith(endpoint):
+                _uri_path = v.replace(task_dir, "tasks").replace("\\", "/")
+                _uri_path = f"{endpoint}/{_uri_path}"
+            else:
+                _uri_path = file
+            return _uri_path
+        if "videos" in task:
+            videos = task["videos"]
+            urls = []
+            for v in videos:
+                urls.append(file_to_uri(v))
+            task["videos"] = urls
+        if "combined_videos" in task:
+            combined_videos = task["combined_videos"]
+            urls = []
+            for v in combined_videos:
+                urls.append(file_to_uri(v))
+            task["combined_videos"] = urls
+        return utils.get_response(200, task)
+    raise HttpException(
+        task_id=task_id, status_code=404, message=f"{request_id}: task not found"
+    )
+@router.delete(
+    "/tasks/{task_id}",
+    response_model=TaskDeletionResponse,
+    summary="Delete a generated short video task",
+)
+def delete_video(request: Request, task_id: str = Path(..., description="Task ID")):
+    request_id = base.get_task_id(request)
+    task = sm.state.get_task(task_id)
+    if task:
+        tasks_dir = utils.task_dir()
+        current_task_dir = os.path.join(tasks_dir, task_id)
+        if os.path.exists(current_task_dir):
+            shutil.rmtree(current_task_dir)
+        sm.state.delete_task(task_id)
+        logger.success(f"video deleted: {utils.to_json(task)}")
+        return utils.get_response(200)
+    raise HttpException(
+        task_id=task_id, status_code=404, message=f"{request_id}: task not found"
+    )
+@router.get(
+    "/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files"
+)
+def get_bgm_list(request: Request):
+    suffix = "*.mp3"
+    song_dir = utils.song_dir()
+    files = glob.glob(os.path.join(song_dir, suffix))
+    bgm_list = []
+    for file in files:
+        bgm_list.append(
+            {
+                "name": os.path.basename(file),
+                "size": os.path.getsize(file),
+                "file": file,
+            }
+        )
+    response = {"files": bgm_list}
+    return utils.get_response(200, response)
+@router.post(
+    "/musics",
+    response_model=BgmUploadResponse,
+    summary="Upload the BGM file to the songs directory",
+)
+def upload_bgm_file(request: Request, file: UploadFile = File(...)):
+    request_id = base.get_task_id(request)
+    # check file ext
+    if file.filename.endswith("mp3"):
+        song_dir = utils.song_dir()
+        save_path = os.path.join(song_dir, file.filename)
+        # save file
+        with open(save_path, "wb+") as buffer:
+            # If the file already exists, it will be overwritten
+            file.file.seek(0)
+            buffer.write(file.file.read())
+        response = {"file": save_path}
+        return utils.get_response(200, response)
+    raise HttpException(
+        "", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded"
+    )
+@router.get("/stream/{file_path:path}")
+async def stream_video(request: Request, file_path: str):
+    tasks_dir = utils.task_dir()
+    video_path = os.path.join(tasks_dir, file_path)
+    range_header = request.headers.get("Range")
+    video_size = os.path.getsize(video_path)
+    start, end = 0, video_size - 1
+    length = video_size
+    if range_header:
+        range_ = range_header.split("bytes=")[1]
+        start, end = [int(part) if part else None for part in range_.split("-")]
+        if start is None:
+            start = video_size - end
+            end = video_size - 1
+        if end is None:
+            end = video_size - 1
+        length = end - start + 1
+    def file_iterator(file_path, offset=0, bytes_to_read=None):
+        with open(file_path, "rb") as f:
+            f.seek(offset, os.SEEK_SET)
+            remaining = bytes_to_read or video_size
+            while remaining > 0:
+                bytes_to_read = min(4096, remaining)
+                data = f.read(bytes_to_read)
+                if not data:
+                    break
+                remaining -= len(data)
+                yield data
+    response = StreamingResponse(
+        file_iterator(video_path, start, length), media_type="video/mp4"
+    )
+    response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
+    response.headers["Accept-Ranges"] = "bytes"
+    response.headers["Content-Length"] = str(length)
+    response.status_code = 206  # Partial Content
+    return response
+@router.get("/download/{file_path:path}")
+async def download_video(_: Request, file_path: str):
+    """
+    download video
+    :param _: Request request
+    :param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4
+    :return: video file
+    """
+    tasks_dir = utils.task_dir()
+    video_path = os.path.join(tasks_dir, file_path)
+    file_path = pathlib.Path(video_path)
+    filename = file_path.stem
+    extension = file_path.suffix
+    headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
+    return FileResponse(
+        path=video_path,
+        headers=headers,
+        filename=f"{filename}{extension}",
+        media_type=f"video/{extension[1:]}",
+    )

app/models/__init__.py ADDED Viewed

File without changes

app/models/const.py ADDED Viewed

	@@ -0,0 +1,25 @@

+PUNCTUATIONS = [
+    "?",
+    ",",
+    ".",
+    "、",
+    ";",
+    ":",
+    "!",
+    "…",
+    "？",
+    "，",
+    "。",
+    "、",
+    "；",
+    "：",
+    "！",
+    "...",
+]
+TASK_STATE_FAILED = -1
+TASK_STATE_COMPLETE = 1
+TASK_STATE_PROCESSING = 4
+FILE_TYPE_VIDEOS = ["mp4", "mov", "mkv", "webm"]
+FILE_TYPE_IMAGES = ["jpg", "jpeg", "png", "bmp"]

app/models/exception.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import traceback
+from typing import Any
+from loguru import logger
+class HttpException(Exception):
+    def __init__(
+        self, task_id: str, status_code: int, message: str = "", data: Any = None
+    ):
+        self.message = message
+        self.status_code = status_code
+        self.data = data
+        # Retrieve the exception stack trace information.
+        tb_str = traceback.format_exc().strip()
+        if not tb_str or tb_str == "NoneType: None":
+            msg = f"HttpException: {status_code}, {task_id}, {message}"
+        else:
+            msg = f"HttpException: {status_code}, {task_id}, {message}\n{tb_str}"
+        if status_code == 400:
+            logger.warning(msg)
+        else:
+            logger.error(msg)
+class FileNotFoundException(Exception):
+    pass

app/models/schema.py ADDED Viewed

	@@ -0,0 +1,303 @@

+import warnings
+from enum import Enum
+from typing import Any, List, Optional, Union
+import pydantic
+from pydantic import BaseModel
+# 忽略 Pydantic 的特定警告
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message="Field name.*shadows an attribute in parent.*",
+)
+class VideoConcatMode(str, Enum):
+    random = "random"
+    sequential = "sequential"
+class VideoTransitionMode(str, Enum):
+    none = None
+    shuffle = "Shuffle"
+    fade_in = "FadeIn"
+    fade_out = "FadeOut"
+    slide_in = "SlideIn"
+    slide_out = "SlideOut"
+class VideoAspect(str, Enum):
+    landscape = "16:9"
+    portrait = "9:16"
+    square = "1:1"
+    def to_resolution(self):
+        if self == VideoAspect.landscape.value:
+            return 1920, 1080
+        elif self == VideoAspect.portrait.value:
+            return 1080, 1920
+        elif self == VideoAspect.square.value:
+            return 1080, 1080
+        return 1080, 1920
+class _Config:
+    arbitrary_types_allowed = True
+@pydantic.dataclasses.dataclass(config=_Config)
+class MaterialInfo:
+    provider: str = "pexels"
+    url: str = ""
+    duration: int = 0
+class VideoParams(BaseModel):
+    """
+    {
+      "video_subject": "",
+      "video_aspect": "横屏 16:9（西瓜视频）",
+      "voice_name": "女生-晓晓",
+      "bgm_name": "random",
+      "font_name": "STHeitiMedium 黑体-中",
+      "text_color": "#FFFFFF",
+      "font_size": 60,
+      "stroke_color": "#000000",
+      "stroke_width": 1.5
+    }
+    """
+    video_subject: str
+    video_script: str = ""  # Script used to generate the video
+    video_terms: Optional[str | list] = None  # Keywords used to generate the video
+    video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
+    video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
+    video_transition_mode: Optional[VideoTransitionMode] = None
+    video_clip_duration: Optional[int] = 5
+    video_count: Optional[int] = 1
+    video_source: Optional[str] = "pexels"
+    video_materials: Optional[List[MaterialInfo]] = (
+        None  # Materials used to generate the video
+    )
+    video_language: Optional[str] = ""  # auto detect
+    voice_name: Optional[str] = ""
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.0
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+    subtitle_enabled: Optional[bool] = True
+    subtitle_position: Optional[str] = "bottom"  # top, bottom, center
+    custom_position: float = 70.0
+    font_name: Optional[str] = "STHeitiMedium.ttc"
+    text_fore_color: Optional[str] = "#FFFFFF"
+    text_background_color: Union[bool, str] = True
+    font_size: int = 60
+    stroke_color: Optional[str] = "#000000"
+    stroke_width: float = 1.5
+    n_threads: Optional[int] = 2
+    paragraph_number: Optional[int] = 1
+class SubtitleRequest(BaseModel):
+    video_script: str
+    video_language: Optional[str] = ""
+    voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.2
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+    subtitle_position: Optional[str] = "bottom"
+    font_name: Optional[str] = "STHeitiMedium.ttc"
+    text_fore_color: Optional[str] = "#FFFFFF"
+    text_background_color: Union[bool, str] = True
+    font_size: int = 60
+    stroke_color: Optional[str] = "#000000"
+    stroke_width: float = 1.5
+    video_source: Optional[str] = "local"
+    subtitle_enabled: Optional[str] = "true"
+class AudioRequest(BaseModel):
+    video_script: str
+    video_language: Optional[str] = ""
+    voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.2
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+    video_source: Optional[str] = "local"
+class VideoScriptParams:
+    """
+    {
+      "video_subject": "春天的花海",
+      "video_language": "",
+      "paragraph_number": 1
+    }
+    """
+    video_subject: Optional[str] = "春天的花海"
+    video_language: Optional[str] = ""
+    paragraph_number: Optional[int] = 1
+class VideoTermsParams:
+    """
+    {
+      "video_subject": "",
+      "video_script": "",
+      "amount": 5
+    }
+    """
+    video_subject: Optional[str] = "春天的花海"
+    video_script: Optional[str] = (
+        "春天的花海，如诗如画般展现在眼前。万物复苏的季节里，大地披上了一袭绚丽多彩的盛装。金黄的迎春、粉嫩的樱花、洁白的梨花、艳丽的郁金香……"
+    )
+    amount: Optional[int] = 5
+class BaseResponse(BaseModel):
+    status: int = 200
+    message: Optional[str] = "success"
+    data: Any = None
+class TaskVideoRequest(VideoParams, BaseModel):
+    pass
+class TaskQueryRequest(BaseModel):
+    pass
+class VideoScriptRequest(VideoScriptParams, BaseModel):
+    pass
+class VideoTermsRequest(VideoTermsParams, BaseModel):
+    pass
+######################################################################################################
+######################################################################################################
+######################################################################################################
+######################################################################################################
+class TaskResponse(BaseResponse):
+    class TaskResponseData(BaseModel):
+        task_id: str
+    data: TaskResponseData
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {"task_id": "6c85c8cc-a77a-42b9-bc30-947815aa0558"},
+            },
+        }
+class TaskQueryResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "state": 1,
+                    "progress": 100,
+                    "videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
+                    ],
+                    "combined_videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
+                    ],
+                },
+            },
+        }
+class TaskDeletionResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "state": 1,
+                    "progress": 100,
+                    "videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
+                    ],
+                    "combined_videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
+                    ],
+                },
+            },
+        }
+class VideoScriptResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "video_script": "春天的花海，是大自然的一幅美丽画卷。在这个季节里，大地复苏，万物生长，花朵争相绽放，形成了一片五彩斑斓的花海..."
+                },
+            },
+        }
+class VideoTermsResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {"video_terms": ["sky", "tree"]},
+            },
+        }
+class BgmRetrieveResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "files": [
+                        {
+                            "name": "output013.mp3",
+                            "size": 1891269,
+                            "file": "/MoneyPrinterTurbo/resource/songs/output013.mp3",
+                        }
+                    ]
+                },
+            },
+        }
+class BgmUploadResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {"file": "/MoneyPrinterTurbo/resource/songs/example.mp3"},
+            },
+        }

app/router.py ADDED Viewed

	@@ -0,0 +1,17 @@

+"""Application configuration - root APIRouter.
+Defines all FastAPI application endpoints.
+Resources:
+    1. https://fastapi.tiangolo.com/tutorial/bigger-applications
+"""
+from fastapi import APIRouter
+from app.controllers.v1 import llm, video
+root_api_router = APIRouter()
+# v1
+root_api_router.include_router(video.router)
+root_api_router.include_router(llm.router)

app/services/__init__.py ADDED Viewed

File without changes

app/services/llm.py ADDED Viewed

	@@ -0,0 +1,445 @@

+import json
+import logging
+import re
+import requests
+from typing import List
+import g4f
+from loguru import logger
+from openai import AzureOpenAI, OpenAI
+from openai.types.chat import ChatCompletion
+from app.config import config
+import os
+_max_retries = 5
+def _generate_response(prompt: str) -> str:
+    try:
+        content = ""
+        llm_provider = 'cloudflare'
+        logger.info(f"llm provider: {llm_provider}")
+        if llm_provider == "g4f":
+            model_name = config.app.get("g4f_model_name", "")
+            if not model_name:
+                model_name = "gpt-3.5-turbo-16k-0613"
+            content = g4f.ChatCompletion.create(
+                model=model_name,
+                messages=[{"role": "user", "content": prompt}],
+            )
+        else:
+            api_version = ""  # for azure
+            if llm_provider == "moonshot":
+                api_key = config.app.get("moonshot_api_key")
+                model_name = config.app.get("moonshot_model_name")
+                base_url = "https://api.moonshot.cn/v1"
+            elif llm_provider == "ollama":
+                # api_key = config.app.get("openai_api_key")
+                api_key = "ollama"  # any string works but you are required to have one
+                model_name = config.app.get("ollama_model_name")
+                base_url = config.app.get("ollama_base_url", "")
+                if not base_url:
+                    base_url = "http://localhost:11434/v1"
+            elif llm_provider == "openai":
+                api_key = config.app.get("openai_api_key")
+                model_name = config.app.get("openai_model_name")
+                base_url = config.app.get("openai_base_url", "")
+                if not base_url:
+                    base_url = "https://api.openai.com/v1"
+            elif llm_provider == "oneapi":
+                api_key = config.app.get("oneapi_api_key")
+                model_name = config.app.get("oneapi_model_name")
+                base_url = config.app.get("oneapi_base_url", "")
+            elif llm_provider == "azure":
+                api_key = config.app.get("azure_api_key")
+                model_name = config.app.get("azure_model_name")
+                base_url = config.app.get("azure_base_url", "")
+                api_version = config.app.get("azure_api_version", "2024-02-15-preview")
+            elif llm_provider == "gemini":
+                api_key = config.app.get("gemini_api_key")
+                model_name = config.app.get("gemini_model_name")
+                base_url = "***"
+            elif llm_provider == "qwen":
+                api_key = config.app.get("qwen_api_key")
+                model_name = config.app.get("qwen_model_name")
+                base_url = "***"
+            elif llm_provider == "cloudflare":
+                api_key = config.app.get("cloudflare_api_key")
+                model_name = config.app.get("cloudflare_model_name")
+                account_id = config.app.get("cloudflare_account_id")
+                base_url = "***"
+            elif llm_provider == "deepseek":
+                api_key = config.app.get("deepseek_api_key")
+                model_name = config.app.get("deepseek_model_name")
+                base_url = config.app.get("deepseek_base_url")
+                if not base_url:
+                    base_url = "https://api.deepseek.com"
+            elif llm_provider == "ernie":
+                api_key = config.app.get("ernie_api_key")
+                secret_key = config.app.get("ernie_secret_key")
+                base_url = config.app.get("ernie_base_url")
+                model_name = "***"
+                if not secret_key:
+                    raise ValueError(
+                        f"{llm_provider}: secret_key is not set, please set it in the config.toml file."
+                    )
+            elif llm_provider == "pollinations":
+                try:
+                    base_url = config.app.get("pollinations_base_url", "")
+                    if not base_url:
+                        base_url = "https://text.pollinations.ai/openai"
+                    model_name = config.app.get("pollinations_model_name", "openai-fast")
+                    # Prepare the payload
+                    payload = {
+                        "model": model_name,
+                        "messages": [
+                            {"role": "user", "content": prompt}
+                        ],
+                        "seed": 101  # Optional but helps with reproducibility
+                    }
+                    # Optional parameters if configured
+                    if config.app.get("pollinations_private"):
+                        payload["private"] = True
+                    if config.app.get("pollinations_referrer"):
+                        payload["referrer"] = config.app.get("pollinations_referrer")
+                    headers = {
+                        "Content-Type": "application/json"
+                    }
+                    # Make the API request
+                    response = requests.post(base_url, headers=headers, json=payload)
+                    response.raise_for_status()
+                    result = response.json()
+                    if result and "choices" in result and len(result["choices"]) > 0:
+                        content = result["choices"][0]["message"]["content"]
+                        return content.replace("\n", "")
+                    else:
+                        raise Exception(f"[{llm_provider}] returned an invalid response format")
+                except requests.exceptions.RequestException as e:
+                    raise Exception(f"[{llm_provider}] request failed: {str(e)}")
+                except Exception as e:
+                    raise Exception(f"[{llm_provider}] error: {str(e)}")
+            if llm_provider not in ["pollinations", "ollama"]:  # Skip validation for providers that don't require API key
+                if not api_key:
+                    raise ValueError(
+                        f"{llm_provider}: api_key is not set, please set it in the config.toml file."
+                    )
+                if not model_name:
+                    raise ValueError(
+                        f"{llm_provider}: model_name is not set, please set it in the config.toml file."
+                    )
+                if not base_url:
+                    raise ValueError(
+                        f"{llm_provider}: base_url is not set, please set it in the config.toml file."
+                    )
+            if llm_provider == "qwen":
+                import dashscope
+                from dashscope.api_entities.dashscope_response import GenerationResponse
+                dashscope.api_key = api_key
+                response = dashscope.Generation.call(
+                    model=model_name, messages=[{"role": "user", "content": prompt}]
+                )
+                if response:
+                    if isinstance(response, GenerationResponse):
+                        status_code = response.status_code
+                        if status_code != 200:
+                            raise Exception(
+                                f'[{llm_provider}] returned an error response: "{response}"'
+                            )
+                        content = response["output"]["text"]
+                        return content.replace("\n", "")
+                    else:
+                        raise Exception(
+                            f'[{llm_provider}] returned an invalid response: "{response}"'
+                        )
+                else:
+                    raise Exception(f"[{llm_provider}] returned an empty response")
+            if llm_provider == "gemini":
+                import google.generativeai as genai
+                genai.configure(api_key=api_key, transport="rest")
+                generation_config = {
+                    "temperature": 0.5,
+                    "top_p": 1,
+                    "top_k": 1,
+                    "max_output_tokens": 2048,
+                }
+                safety_settings = [
+                    {
+                        "category": "HARM_CATEGORY_HARASSMENT",
+                        "threshold": "BLOCK_ONLY_HIGH",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_HATE_SPEECH",
+                        "threshold": "BLOCK_ONLY_HIGH",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                        "threshold": "BLOCK_ONLY_HIGH",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                        "threshold": "BLOCK_ONLY_HIGH",
+                    },
+                ]
+                model = genai.GenerativeModel(
+                    model_name=model_name,
+                    generation_config=generation_config,
+                    safety_settings=safety_settings,
+                )
+                try:
+                    response = model.generate_content(prompt)
+                    candidates = response.candidates
+                    generated_text = candidates[0].content.parts[0].text
+                except (AttributeError, IndexError) as e:
+                    print("Gemini Error:", e)
+                return generated_text
+            if llm_provider == "cloudflare":
+                response = requests.post(
+                    f"https://api.cloudflare.com/client/v4/accounts/11b1d730ea208e2e78adc22e1e2e8059/ai/run/@cf/meta/llama-4-scout-17b-16e-instruct",
+                    headers={"Authorization": f'Bearer {os.getenv("cloudflare")}'},
+                    json={
+                        "messages": [
+                            {
+                                "role": "system",
+                                "content": "You are a friendly assistant",
+                            },
+                            {"role": "user", "content": prompt},
+                        ]
+                    },
+                )
+                result = response.json()
+                logger.info(result)
+                return result["result"]["response"]
+            if llm_provider == "ernie":
+                response = requests.post(
+                    "https://aip.baidubce.com/oauth/2.0/token",
+                    params={
+                        "grant_type": "client_credentials",
+                        "client_id": api_key,
+                        "client_secret": secret_key,
+                    }
+                )
+                access_token = response.json().get("access_token")
+                url = f"{base_url}?access_token={access_token}"
+                payload = json.dumps(
+                    {
+                        "messages": [{"role": "user", "content": prompt}],
+                        "temperature": 0.5,
+                        "top_p": 0.8,
+                        "penalty_score": 1,
+                        "disable_search": False,
+                        "enable_citation": False,
+                        "response_format": "text",
+                    }
+                )
+                headers = {"Content-Type": "application/json"}
+                response = requests.request(
+                    "POST", url, headers=headers, data=payload
+                ).json()
+                return response.get("result")
+            if llm_provider == "azure":
+                client = AzureOpenAI(
+                    api_key=api_key,
+                    api_version=api_version,
+                    azure_endpoint=base_url,
+                )
+            else:
+                client = OpenAI(
+                    api_key=api_key,
+                    base_url=base_url,
+                )
+            response = client.chat.completions.create(
+                model=model_name, messages=[{"role": "user", "content": prompt}]
+            )
+            if response:
+                if isinstance(response, ChatCompletion):
+                    content = response.choices[0].message.content
+                else:
+                    raise Exception(
+                        f'[{llm_provider}] returned an invalid response: "{response}", please check your network '
+                        f"connection and try again."
+                    )
+            else:
+                raise Exception(
+                    f"[{llm_provider}] returned an empty response, please check your network connection and try again."
+                )
+        return content.replace("\n", "")
+    except Exception as e:
+        return f"Error: {str(e)}"
+def generate_script(
+    video_subject: str, language: str = "", paragraph_number: int = 1
+) -> str:
+    prompt = f"""
+# Role: Video Script Generator
+## Goals:
+Generate a script for a video, depending on the subject of the video.
+## Constrains:
+1. the script is to be returned as a string with the specified number of paragraphs.
+2. do not under any circumstance reference this prompt in your response.
+3. get straight to the point, don't start with unnecessary things like, "welcome to this video".
+4. you must not include any type of markdown or formatting in the script, never use a title.
+5. only return the raw content of the script.
+6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line.
+7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script.
+8. respond in the same language as the video subject.
+# Initialization:
+- video subject: {video_subject}
+- number of paragraphs: {paragraph_number}
+""".strip()
+    if language:
+        prompt += f"\n- language: {language}"
+    final_script = ""
+    logger.info(f"subject: {video_subject}")
+    def format_response(response):
+        # Clean the script
+        # Remove asterisks, hashes
+        response = response.replace("*", "")
+        response = response.replace("#", "")
+        # Remove markdown syntax
+        response = re.sub(r"\[.*\]", "", response)
+        response = re.sub(r"\(.*\)", "", response)
+        # Split the script into paragraphs
+        paragraphs = response.split("\n\n")
+        # Select the specified number of paragraphs
+        # selected_paragraphs = paragraphs[:paragraph_number]
+        # Join the selected paragraphs into a single string
+        return "\n\n".join(paragraphs)
+    for i in range(_max_retries):
+        try:
+            response = _generate_response(prompt=prompt)
+            if response:
+                final_script = format_response(response)
+            else:
+                logging.error("gpt returned an empty response")
+            # g4f may return an error message
+            if final_script and "当日额度已消耗完" in final_script:
+                raise ValueError(final_script)
+            if final_script:
+                break
+        except Exception as e:
+            logger.error(f"failed to generate script: {e}")
+        if i < _max_retries:
+            logger.warning(f"failed to generate video script, trying again... {i + 1}")
+    if "Error: " in final_script:
+        logger.error(f"failed to generate video script: {final_script}")
+    else:
+        logger.success(f"completed: \n{final_script}")
+    return final_script.strip()
+def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]:
+    prompt = f"""
+# Role: Video Search Terms Generator
+## Goals:
+Generate {amount} search terms for stock videos, depending on the subject of a video.
+## Constrains:
+1. the search terms are to be returned as a json-array of strings.
+2. each search term should consist of 1-3 words, always add the main subject of the video.
+3. you must only return the json-array of strings. you must not return anything else. you must not return the script.
+4. the search terms must be related to the subject of the video.
+5. reply with english search terms only.
+## Output Example:
+["search term 1", "search term 2", "search term 3","search term 4","search term 5"]
+## Context:
+### Video Subject
+{video_subject}
+### Video Script
+{video_script}
+Please note that you must use English for generating video search terms; Chinese is not accepted.
+""".strip()
+    logger.info(f"subject: {video_subject}")
+    search_terms = []
+    response = ""
+    for i in range(_max_retries):
+        try:
+            response = _generate_response(prompt)
+            if "Error: " in response:
+                logger.error(f"failed to generate video script: {response}")
+                return response
+            search_terms = json.loads(response)
+            if not isinstance(search_terms, list) or not all(
+                isinstance(term, str) for term in search_terms
+            ):
+                logger.error("response is not a list of strings.")
+                continue
+        except Exception as e:
+            logger.warning(f"failed to generate video terms: {str(e)}")
+            if response:
+                match = re.search(r"\[.*]", response)
+                if match:
+                    try:
+                        search_terms = json.loads(match.group())
+                    except Exception as e:
+                        logger.warning(f"failed to generate video terms: {str(e)}")
+                        pass
+        if search_terms and len(search_terms) > 0:
+            break
+        if i < _max_retries:
+            logger.warning(f"failed to generate video terms, trying again... {i + 1}")
+    logger.success(f"completed: \n{search_terms}")
+    return search_terms
+if __name__ == "__main__":
+    video_subject = "生命的意义是什么"
+    script = generate_script(
+        video_subject=video_subject, language="zh-CN", paragraph_number=1
+    )
+    print("######################")
+    print(script)
+    search_terms = generate_terms(
+        video_subject=video_subject, video_script=script, amount=5
+    )
+    print("######################")
+    print(search_terms)

app/services/material.py ADDED Viewed

	@@ -0,0 +1,268 @@

+import os
+import random
+from typing import List
+from urllib.parse import urlencode
+import requests
+from loguru import logger
+from moviepy.video.io.VideoFileClip import VideoFileClip
+from app.config import config
+from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode
+from app.utils import utils
+import os
+requested_count = 0
+def get_api_key(cfg_key: str):
+    api_keys = config.app.get(cfg_key)
+    if not api_keys:
+        raise ValueError(
+            f"\n\n##### {cfg_key} is not set #####\n\nPlease set it in the config.toml file: {config.config_file}\n\n"
+            f"{utils.to_json(config.app)}"
+        )
+    # if only one key is provided, return it
+    if isinstance(api_keys, str):
+        return api_keys
+    global requested_count
+    requested_count += 1
+    return api_keys[requested_count % len(api_keys)]
+def search_videos_pexels(
+    search_term: str,
+    minimum_duration: int,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+) -> List[MaterialInfo]:
+    aspect = VideoAspect(video_aspect)
+    video_orientation = aspect.name
+    video_width, video_height = aspect.to_resolution()
+    api_key = os.getenv('pexels')
+    headers = {
+        "Authorization": api_key,
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
+    }
+    # Build URL
+    params = {"query": search_term, "per_page": 20, "orientation": video_orientation}
+    query_url = f"https://api.pexels.com/videos/search?{urlencode(params)}"
+    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
+    try:
+        r = requests.get(
+            query_url,
+            headers=headers,
+            proxies=config.proxy,
+            verify=False,
+            timeout=(30, 60),
+        )
+        response = r.json()
+        video_items = []
+        if "videos" not in response:
+            logger.error(f"search videos failed: {response}")
+            return video_items
+        videos = response["videos"]
+        # loop through each video in the result
+        for v in videos:
+            duration = v["duration"]
+            # check if video has desired minimum duration
+            if duration < minimum_duration:
+                continue
+            video_files = v["video_files"]
+            # loop through each url to determine the best quality
+            for video in video_files:
+                w = int(video["width"])
+                h = int(video["height"])
+                if w == video_width and h == video_height:
+                    item = MaterialInfo()
+                    item.provider = "pexels"
+                    item.url = video["link"]
+                    item.duration = duration
+                    video_items.append(item)
+                    break
+        return video_items
+    except Exception as e:
+        logger.error(f"search videos failed: {str(e)}")
+    return []
+def search_videos_pixabay(
+    search_term: str,
+    minimum_duration: int,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+) -> List[MaterialInfo]:
+    aspect = VideoAspect(video_aspect)
+    video_width, video_height = aspect.to_resolution()
+    api_key = get_api_key("pixabay_api_keys")
+    # Build URL
+    params = {
+        "q": search_term,
+        "video_type": "all",  # Accepted values: "all", "film", "animation"
+        "per_page": 50,
+        "key": api_key,
+    }
+    query_url = f"https://pixabay.com/api/videos/?{urlencode(params)}"
+    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
+    try:
+        r = requests.get(
+            query_url, proxies=config.proxy, verify=False, timeout=(30, 60)
+        )
+        response = r.json()
+        video_items = []
+        if "hits" not in response:
+            logger.error(f"search videos failed: {response}")
+            return video_items
+        videos = response["hits"]
+        # loop through each video in the result
+        for v in videos:
+            duration = v["duration"]
+            # check if video has desired minimum duration
+            if duration < minimum_duration:
+                continue
+            video_files = v["videos"]
+            # loop through each url to determine the best quality
+            for video_type in video_files:
+                video = video_files[video_type]
+                w = int(video["width"])
+                # h = int(video["height"])
+                if w >= video_width:
+                    item = MaterialInfo()
+                    item.provider = "pixabay"
+                    item.url = video["url"]
+                    item.duration = duration
+                    video_items.append(item)
+                    break
+        return video_items
+    except Exception as e:
+        logger.error(f"search videos failed: {str(e)}")
+    return []
+def save_video(video_url: str, save_dir: str = "") -> str:
+    if not save_dir:
+        save_dir = utils.storage_dir("cache_videos")
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    url_without_query = video_url.split("?")[0]
+    url_hash = utils.md5(url_without_query)
+    video_id = f"vid-{url_hash}"
+    video_path = f"{save_dir}/{video_id}.mp4"
+    # if video already exists, return the path
+    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
+        logger.info(f"video already exists: {video_path}")
+        return video_path
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
+    }
+    # if video does not exist, download it
+    with open(video_path, "wb") as f:
+        f.write(
+            requests.get(
+                video_url,
+                headers=headers,
+                proxies=config.proxy,
+                verify=False,
+                timeout=(60, 240),
+            ).content
+        )
+    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
+        try:
+            clip = VideoFileClip(video_path)
+            duration = clip.duration
+            fps = clip.fps
+            clip.close()
+            if duration > 0 and fps > 0:
+                return video_path
+        except Exception as e:
+            try:
+                os.remove(video_path)
+            except Exception:
+                pass
+            logger.warning(f"invalid video file: {video_path} => {str(e)}")
+    return ""
+def download_videos(
+    task_id: str,
+    search_terms: List[str],
+    source: str = "pexels",
+    video_aspect: VideoAspect = VideoAspect.portrait,
+    video_contact_mode: VideoConcatMode = VideoConcatMode.random,
+    audio_duration: float = 0.0,
+    max_clip_duration: int = 5,
+) -> List[str]:
+    valid_video_items = []
+    valid_video_urls = []
+    found_duration = 0.0
+    search_videos = search_videos_pexels
+    if source == "pixabay":
+        search_videos = search_videos_pixabay
+    for search_term in search_terms:
+        video_items = search_videos(
+            search_term=search_term,
+            minimum_duration=max_clip_duration,
+            video_aspect=video_aspect,
+        )
+        logger.info(f"found {len(video_items)} videos for '{search_term}'")
+        for item in video_items:
+            if item.url not in valid_video_urls:
+                valid_video_items.append(item)
+                valid_video_urls.append(item.url)
+                found_duration += item.duration
+    logger.info(
+        f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds"
+    )
+    video_paths = []
+    material_directory = config.app.get("material_directory", "").strip()
+    if material_directory == "task":
+        material_directory = utils.task_dir(task_id)
+    elif material_directory and not os.path.isdir(material_directory):
+        material_directory = ""
+    if video_contact_mode.value == VideoConcatMode.random.value:
+        random.shuffle(valid_video_items)
+    total_duration = 0.0
+    for item in valid_video_items:
+        try:
+            logger.info(f"downloading video: {item.url}")
+            saved_video_path = save_video(
+                video_url=item.url, save_dir=material_directory
+            )
+            if saved_video_path:
+                logger.info(f"video saved: {saved_video_path}")
+                video_paths.append(saved_video_path)
+                seconds = min(max_clip_duration, item.duration)
+                total_duration += seconds
+                if total_duration > audio_duration:
+                    logger.info(
+                        f"total duration of downloaded videos: {total_duration} seconds, skip downloading more"
+                    )
+                    break
+        except Exception as e:
+            logger.error(f"failed to download video: {utils.to_json(item)} => {str(e)}")
+    logger.success(f"downloaded {len(video_paths)} videos")
+    return video_paths
+if __name__ == "__main__":
+    download_videos(
+        "test123", ["Money Exchange Medium"], audio_duration=100, source="pixabay"
+    )

app/services/state.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import ast
+from abc import ABC, abstractmethod
+from app.config import config
+from app.models import const
+# Base class for state management
+class BaseState(ABC):
+    @abstractmethod
+    def update_task(self, task_id: str, state: int, progress: int = 0, **kwargs):
+        pass
+    @abstractmethod
+    def get_task(self, task_id: str):
+        pass
+    @abstractmethod
+    def get_all_tasks(self, page: int, page_size: int):
+        pass
+# Memory state management
+class MemoryState(BaseState):
+    def __init__(self):
+        self._tasks = {}
+    def get_all_tasks(self, page: int, page_size: int):
+        start = (page - 1) * page_size
+        end = start + page_size
+        tasks = list(self._tasks.values())
+        total = len(tasks)
+        return tasks[start:end], total
+    def update_task(
+        self,
+        task_id: str,
+        state: int = const.TASK_STATE_PROCESSING,
+        progress: int = 0,
+        **kwargs,
+    ):
+        progress = int(progress)
+        if progress > 100:
+            progress = 100
+        self._tasks[task_id] = {
+            "task_id": task_id,
+            "state": state,
+            "progress": progress,
+            **kwargs,
+        }
+    def get_task(self, task_id: str):
+        return self._tasks.get(task_id, None)
+    def delete_task(self, task_id: str):
+        if task_id in self._tasks:
+            del self._tasks[task_id]
+# Redis state management
+class RedisState(BaseState):
+    def __init__(self, host="localhost", port=6379, db=0, password=None):
+        import redis
+        self._redis = redis.StrictRedis(host=host, port=port, db=db, password=password)
+    def get_all_tasks(self, page: int, page_size: int):
+        start = (page - 1) * page_size
+        end = start + page_size
+        tasks = []
+        cursor = 0
+        total = 0
+        while True:
+            cursor, keys = self._redis.scan(cursor, count=page_size)
+            total += len(keys)
+            if total > start:
+                for key in keys[max(0, start - total):end - total]:
+                    task_data = self._redis.hgetall(key)
+                    task = {
+                        k.decode("utf-8"): self._convert_to_original_type(v) for k, v in task_data.items()
+                    }
+                    tasks.append(task)
+                    if len(tasks) >= page_size:
+                        break
+            if cursor == 0 or len(tasks) >= page_size:
+                break
+        return tasks, total
+    def update_task(
+        self,
+        task_id: str,
+        state: int = const.TASK_STATE_PROCESSING,
+        progress: int = 0,
+        **kwargs,
+    ):
+        progress = int(progress)
+        if progress > 100:
+            progress = 100
+        fields = {
+            "task_id": task_id,
+            "state": state,
+            "progress": progress,
+            **kwargs,
+        }
+        for field, value in fields.items():
+            self._redis.hset(task_id, field, str(value))
+    def get_task(self, task_id: str):
+        task_data = self._redis.hgetall(task_id)
+        if not task_data:
+            return None
+        task = {
+            key.decode("utf-8"): self._convert_to_original_type(value)
+            for key, value in task_data.items()
+        }
+        return task
+    def delete_task(self, task_id: str):
+        self._redis.delete(task_id)
+    @staticmethod
+    def _convert_to_original_type(value):
+        """
+        Convert the value from byte string to its original data type.
+        You can extend this method to handle other data types as needed.
+        """
+        value_str = value.decode("utf-8")
+        try:
+            # try to convert byte string array to list
+            return ast.literal_eval(value_str)
+        except (ValueError, SyntaxError):
+            pass
+        if value_str.isdigit():
+            return int(value_str)
+        # Add more conversions here if needed
+        return value_str
+# Global state
+_enable_redis = config.app.get("enable_redis", False)
+_redis_host = config.app.get("redis_host", "localhost")
+_redis_port = config.app.get("redis_port", 6379)
+_redis_db = config.app.get("redis_db", 0)
+_redis_password = config.app.get("redis_password", None)
+state = (
+    RedisState(
+        host=_redis_host, port=_redis_port, db=_redis_db, password=_redis_password
+    )
+    if _enable_redis
+    else MemoryState()
+)

app/services/subtitle.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import json
+import os.path
+import re
+from timeit import default_timer as timer
+from faster_whisper import WhisperModel
+from loguru import logger
+from app.config import config
+from app.utils import utils
+model_size = config.whisper.get("model_size", "large-v3")
+device = config.whisper.get("device", "cpu")
+compute_type = config.whisper.get("compute_type", "int8")
+model = None
+def create(audio_file, subtitle_file: str = ""):
+    global model
+    if not model:
+        model_path = f"{utils.root_dir()}/models/whisper-{model_size}"
+        model_bin_file = f"{model_path}/model.bin"
+        if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
+            model_path = model_size
+        logger.info(
+            f"loading model: {model_path}, device: {device}, compute_type: {compute_type}"
+        )
+        try:
+            model = WhisperModel(
+                model_size_or_path=model_path, device=device, compute_type=compute_type
+            )
+        except Exception as e:
+            logger.error(
+                f"failed to load model: {e} \n\n"
+                f"********************************************\n"
+                f"this may be caused by network issue. \n"
+                f"please download the model manually and put it in the 'models' folder. \n"
+                f"see [README.md FAQ](https://github.com/harry0703/MoneyPrinterTurbo) for more details.\n"
+                f"********************************************\n\n"
+            )
+            return None
+    logger.info(f"start, output file: {subtitle_file}")
+    if not subtitle_file:
+        subtitle_file = f"{audio_file}.srt"
+    segments, info = model.transcribe(
+        audio_file,
+        beam_size=5,
+        word_timestamps=True,
+        vad_filter=True,
+        vad_parameters=dict(min_silence_duration_ms=500),
+    )
+    logger.info(
+        f"detected language: '{info.language}', probability: {info.language_probability:.2f}"
+    )
+    start = timer()
+    subtitles = []
+    def recognized(seg_text, seg_start, seg_end):
+        seg_text = seg_text.strip()
+        if not seg_text:
+            return
+        msg = "[%.2fs -> %.2fs] %s" % (seg_start, seg_end, seg_text)
+        logger.debug(msg)
+        subtitles.append(
+            {"msg": seg_text, "start_time": seg_start, "end_time": seg_end}
+        )
+    for segment in segments:
+        words_idx = 0
+        words_len = len(segment.words)
+        seg_start = 0
+        seg_end = 0
+        seg_text = ""
+        if segment.words:
+            is_segmented = False
+            for word in segment.words:
+                if not is_segmented:
+                    seg_start = word.start
+                    is_segmented = True
+                seg_end = word.end
+                # If it contains punctuation, then break the sentence.
+                seg_text += word.word
+                if utils.str_contains_punctuation(word.word):
+                    # remove last char
+                    seg_text = seg_text[:-1]
+                    if not seg_text:
+                        continue
+                    recognized(seg_text, seg_start, seg_end)
+                    is_segmented = False
+                    seg_text = ""
+                if words_idx == 0 and segment.start < word.start:
+                    seg_start = word.start
+                if words_idx == (words_len - 1) and segment.end > word.end:
+                    seg_end = word.end
+                words_idx += 1
+        if not seg_text:
+            continue
+        recognized(seg_text, seg_start, seg_end)
+    end = timer()
+    diff = end - start
+    logger.info(f"complete, elapsed: {diff:.2f} s")
+    idx = 1
+    lines = []
+    for subtitle in subtitles:
+        text = subtitle.get("msg")
+        if text:
+            lines.append(
+                utils.text_to_srt(
+                    idx, text, subtitle.get("start_time"), subtitle.get("end_time")
+                )
+            )
+            idx += 1
+    sub = "\n".join(lines) + "\n"
+    with open(subtitle_file, "w", encoding="utf-8") as f:
+        f.write(sub)
+    logger.info(f"subtitle file created: {subtitle_file}")
+def file_to_subtitles(filename):
+    if not filename or not os.path.isfile(filename):
+        return []
+    times_texts = []
+    current_times = None
+    current_text = ""
+    index = 0
+    with open(filename, "r", encoding="utf-8") as f:
+        for line in f:
+            times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
+            if times:
+                current_times = line
+            elif line.strip() == "" and current_times:
+                index += 1
+                times_texts.append((index, current_times.strip(), current_text.strip()))
+                current_times, current_text = None, ""
+            elif current_times:
+                current_text += line
+    return times_texts
+def levenshtein_distance(s1, s2):
+    if len(s1) < len(s2):
+        return levenshtein_distance(s2, s1)
+    if len(s2) == 0:
+        return len(s1)
+    previous_row = range(len(s2) + 1)
+    for i, c1 in enumerate(s1):
+        current_row = [i + 1]
+        for j, c2 in enumerate(s2):
+            insertions = previous_row[j + 1] + 1
+            deletions = current_row[j] + 1
+            substitutions = previous_row[j] + (c1 != c2)
+            current_row.append(min(insertions, deletions, substitutions))
+        previous_row = current_row
+    return previous_row[-1]
+def similarity(a, b):
+    distance = levenshtein_distance(a.lower(), b.lower())
+    max_length = max(len(a), len(b))
+    return 1 - (distance / max_length)
+def correct(subtitle_file, video_script):
+    subtitle_items = file_to_subtitles(subtitle_file)
+    script_lines = utils.split_string_by_punctuations(video_script)
+    corrected = False
+    new_subtitle_items = []
+    script_index = 0
+    subtitle_index = 0
+    while script_index < len(script_lines) and subtitle_index < len(subtitle_items):
+        script_line = script_lines[script_index].strip()
+        subtitle_line = subtitle_items[subtitle_index][2].strip()
+        if script_line == subtitle_line:
+            new_subtitle_items.append(subtitle_items[subtitle_index])
+            script_index += 1
+            subtitle_index += 1
+        else:
+            combined_subtitle = subtitle_line
+            start_time = subtitle_items[subtitle_index][1].split(" --> ")[0]
+            end_time = subtitle_items[subtitle_index][1].split(" --> ")[1]
+            next_subtitle_index = subtitle_index + 1
+            while next_subtitle_index < len(subtitle_items):
+                next_subtitle = subtitle_items[next_subtitle_index][2].strip()
+                if similarity(
+                    script_line, combined_subtitle + " " + next_subtitle
+                ) > similarity(script_line, combined_subtitle):
+                    combined_subtitle += " " + next_subtitle
+                    end_time = subtitle_items[next_subtitle_index][1].split(" --> ")[1]
+                    next_subtitle_index += 1
+                else:
+                    break
+            if similarity(script_line, combined_subtitle) > 0.8:
+                logger.warning(
+                    f"Merged/Corrected - Script: {script_line}, Subtitle: {combined_subtitle}"
+                )
+                new_subtitle_items.append(
+                    (
+                        len(new_subtitle_items) + 1,
+                        f"{start_time} --> {end_time}",
+                        script_line,
+                    )
+                )
+                corrected = True
+            else:
+                logger.warning(
+                    f"Mismatch - Script: {script_line}, Subtitle: {combined_subtitle}"
+                )
+                new_subtitle_items.append(
+                    (
+                        len(new_subtitle_items) + 1,
+                        f"{start_time} --> {end_time}",
+                        script_line,
+                    )
+                )
+                corrected = True
+            script_index += 1
+            subtitle_index = next_subtitle_index
+    # Process the remaining lines of the script.
+    while script_index < len(script_lines):
+        logger.warning(f"Extra script line: {script_lines[script_index]}")
+        if subtitle_index < len(subtitle_items):
+            new_subtitle_items.append(
+                (
+                    len(new_subtitle_items) + 1,
+                    subtitle_items[subtitle_index][1],
+                    script_lines[script_index],
+                )
+            )
+            subtitle_index += 1
+        else:
+            new_subtitle_items.append(
+                (
+                    len(new_subtitle_items) + 1,
+                    "00:00:00,000 --> 00:00:00,000",
+                    script_lines[script_index],
+                )
+            )
+        script_index += 1
+        corrected = True
+    if corrected:
+        with open(subtitle_file, "w", encoding="utf-8") as fd:
+            for i, item in enumerate(new_subtitle_items):
+                fd.write(f"{i + 1}\n{item[1]}\n{item[2]}\n\n")
+        logger.info("Subtitle corrected")
+    else:
+        logger.success("Subtitle is correct")
+if __name__ == "__main__":
+    task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
+    task_dir = utils.task_dir(task_id)
+    subtitle_file = f"{task_dir}/subtitle.srt"
+    audio_file = f"{task_dir}/audio.mp3"
+    subtitles = file_to_subtitles(subtitle_file)
+    print(subtitles)
+    script_file = f"{task_dir}/script.json"
+    with open(script_file, "r") as f:
+        script_content = f.read()
+    s = json.loads(script_content)
+    script = s.get("script")
+    correct(subtitle_file, script)
+    subtitle_file = f"{task_dir}/subtitle-test.srt"
+    create(audio_file, subtitle_file)

app/services/task.py ADDED Viewed

	@@ -0,0 +1,339 @@

+import math
+import os.path
+import re
+from os import path
+from loguru import logger
+from app.config import config
+from app.models import const
+from app.models.schema import VideoConcatMode, VideoParams
+from app.services import llm, material, subtitle, video, voice
+from app.services import state as sm
+from app.utils import utils
+def generate_script(task_id, params):
+    logger.info("\n\n## generating video script")
+    video_script = params.video_script.strip()
+    if not video_script:
+        video_script = llm.generate_script(
+            video_subject=params.video_subject,
+            language=params.video_language,
+            paragraph_number=params.paragraph_number,
+        )
+    else:
+        logger.debug(f"video script: \n{video_script}")
+    if not video_script:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        logger.error("failed to generate video script.")
+        return None
+    return video_script
+def generate_terms(task_id, params, video_script):
+    logger.info("\n\n## generating video terms")
+    video_terms = params.video_terms
+    if not video_terms:
+        video_terms = llm.generate_terms(
+            video_subject=params.video_subject, video_script=video_script, amount=5
+        )
+    else:
+        if isinstance(video_terms, str):
+            video_terms = [term.strip() for term in re.split(r"[,，]", video_terms)]
+        elif isinstance(video_terms, list):
+            video_terms = [term.strip() for term in video_terms]
+        else:
+            raise ValueError("video_terms must be a string or a list of strings.")
+        logger.debug(f"video terms: {utils.to_json(video_terms)}")
+    if not video_terms:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        logger.error("failed to generate video terms.")
+        return None
+    return video_terms
+def save_script_data(task_id, video_script, video_terms, params):
+    script_file = path.join(utils.task_dir(task_id), "script.json")
+    script_data = {
+        "script": video_script,
+        "search_terms": video_terms,
+        "params": params,
+    }
+    with open(script_file, "w", encoding="utf-8") as f:
+        f.write(utils.to_json(script_data))
+def generate_audio(task_id, params, video_script):
+    logger.info("\n\n## generating audio")
+    audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
+    sub_maker = voice.tts(
+        text=video_script,
+        voice_name=voice.parse_voice_name(params.voice_name),
+        voice_rate=params.voice_rate,
+        voice_file=audio_file,
+    )
+    if sub_maker is None:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        logger.error(
+            """failed to generate audio:
+1. check if the language of the voice matches the language of the video script.
+2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
+        """.strip()
+        )
+        return None, None, None
+    audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
+    return audio_file, audio_duration, sub_maker
+def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
+    if not params.subtitle_enabled:
+        return ""
+    subtitle_path = path.join(utils.task_dir(task_id), "subtitle.srt")
+    subtitle_provider = config.app.get("subtitle_provider", "edge").strip().lower()
+    logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
+    subtitle_fallback = False
+    if subtitle_provider == "edge":
+        voice.create_subtitle(
+            text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path
+        )
+        if not os.path.exists(subtitle_path):
+            subtitle_fallback = True
+            logger.warning("subtitle file not found, fallback to whisper")
+    if subtitle_provider == "whisper" or subtitle_fallback:
+        subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
+        logger.info("\n\n## correcting subtitle")
+        subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
+    subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
+    if not subtitle_lines:
+        logger.warning(f"subtitle file is invalid: {subtitle_path}")
+        return ""
+    return subtitle_path
+def get_video_materials(task_id, params, video_terms, audio_duration):
+    if params.video_source == "local":
+        logger.info("\n\n## preprocess local materials")
+        materials = video.preprocess_video(
+            materials=params.video_materials, clip_duration=params.video_clip_duration
+        )
+        if not materials:
+            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+            logger.error(
+                "no valid materials found, please check the materials and try again."
+            )
+            return None
+        return [material_info.url for material_info in materials]
+    else:
+        logger.info(f"\n\n## downloading videos from {params.video_source}")
+        downloaded_videos = material.download_videos(
+            task_id=task_id,
+            search_terms=video_terms,
+            source=params.video_source,
+            video_aspect=params.video_aspect,
+            video_contact_mode=params.video_concat_mode,
+            audio_duration=audio_duration * params.video_count,
+            max_clip_duration=params.video_clip_duration,
+        )
+        if not downloaded_videos:
+            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+            logger.error(
+                "failed to download videos, maybe the network is not available. if you are in China, please use a VPN."
+            )
+            return None
+        return downloaded_videos
+def generate_final_videos(
+    task_id, params, downloaded_videos, audio_file, subtitle_path
+):
+    final_video_paths = []
+    combined_video_paths = []
+    video_concat_mode = (
+        params.video_concat_mode if params.video_count == 1 else VideoConcatMode.random
+    )
+    video_transition_mode = params.video_transition_mode
+    _progress = 50
+    for i in range(params.video_count):
+        index = i + 1
+        combined_video_path = path.join(
+            utils.task_dir(task_id), f"combined-{index}.mp4"
+        )
+        logger.info(f"\n\n## combining video: {index} => {combined_video_path}")
+        video.combine_videos(
+            combined_video_path=combined_video_path,
+            video_paths=downloaded_videos,
+            audio_file=audio_file,
+            video_aspect=params.video_aspect,
+            video_concat_mode=video_concat_mode,
+            video_transition_mode=video_transition_mode,
+            max_clip_duration=params.video_clip_duration,
+            threads=params.n_threads,
+        )
+        _progress += 50 / params.video_count / 2
+        sm.state.update_task(task_id, progress=_progress)
+        final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
+        logger.info(f"\n\n## generating video: {index} => {final_video_path}")
+        video.generate_video(
+            video_path=combined_video_path,
+            audio_path=audio_file,
+            subtitle_path=subtitle_path,
+            output_file=final_video_path,
+            params=params,
+        )
+        _progress += 50 / params.video_count / 2
+        sm.state.update_task(task_id, progress=_progress)
+        final_video_paths.append(final_video_path)
+        combined_video_paths.append(combined_video_path)
+    return final_video_paths, combined_video_paths
+def start(task_id, params: VideoParams, stop_at: str = "video"):
+    logger.info(f"start task: {task_id}, stop_at: {stop_at}")
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
+    if type(params.video_concat_mode) is str:
+        params.video_concat_mode = VideoConcatMode(params.video_concat_mode)
+    # 1. Generate script
+    video_script = generate_script(task_id, params)
+    if not video_script or "Error: " in video_script:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10)
+    if stop_at == "script":
+        sm.state.update_task(
+            task_id, state=const.TASK_STATE_COMPLETE, progress=100, script=video_script
+        )
+        return {"script": video_script}
+    # 2. Generate terms
+    video_terms = ""
+    if params.video_source != "local":
+        video_terms = generate_terms(task_id, params, video_script)
+        if not video_terms:
+            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+            return
+    save_script_data(task_id, video_script, video_terms, params)
+    if stop_at == "terms":
+        sm.state.update_task(
+            task_id, state=const.TASK_STATE_COMPLETE, progress=100, terms=video_terms
+        )
+        return {"script": video_script, "terms": video_terms}
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
+    # 3. Generate audio
+    audio_file, audio_duration, sub_maker = generate_audio(
+        task_id, params, video_script
+    )
+    if not audio_file:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
+    if stop_at == "audio":
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_COMPLETE,
+            progress=100,
+            audio_file=audio_file,
+        )
+        return {"audio_file": audio_file, "audio_duration": audio_duration}
+    # 4. Generate subtitle
+    subtitle_path = generate_subtitle(
+        task_id, params, video_script, sub_maker, audio_file
+    )
+    if stop_at == "subtitle":
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_COMPLETE,
+            progress=100,
+            subtitle_path=subtitle_path,
+        )
+        return {"subtitle_path": subtitle_path}
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
+    # 5. Get video materials
+    downloaded_videos = get_video_materials(
+        task_id, params, video_terms, audio_duration
+    )
+    if not downloaded_videos:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+    if stop_at == "materials":
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_COMPLETE,
+            progress=100,
+            materials=downloaded_videos,
+        )
+        return {"materials": downloaded_videos}
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
+    # 6. Generate final videos
+    final_video_paths, combined_video_paths = generate_final_videos(
+        task_id, params, downloaded_videos, audio_file, subtitle_path
+    )
+    if not final_video_paths:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+    logger.success(
+        f"task {task_id} finished, generated {len(final_video_paths)} videos."
+    )
+    kwargs = {
+        "videos": final_video_paths,
+        "combined_videos": combined_video_paths,
+        "script": video_script,
+        "terms": video_terms,
+        "audio_file": audio_file,
+        "audio_duration": audio_duration,
+        "subtitle_path": subtitle_path,
+        "materials": downloaded_videos,
+    }
+    sm.state.update_task(
+        task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs
+    )
+    return kwargs
+if __name__ == "__main__":
+    task_id = "task_id"
+    params = VideoParams(
+        video_subject="金钱的作用",
+        voice_name="zh-CN-XiaoyiNeural-Female",
+        voice_rate=1.0,
+    )
+    start(task_id, params, stop_at="video")

app/services/utils/video_effects.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from moviepy import Clip, vfx
+# FadeIn
+def fadein_transition(clip: Clip, t: float) -> Clip:
+    return clip.with_effects([vfx.FadeIn(t)])
+# FadeOut
+def fadeout_transition(clip: Clip, t: float) -> Clip:
+    return clip.with_effects([vfx.FadeOut(t)])
+# SlideIn
+def slidein_transition(clip: Clip, t: float, side: str) -> Clip:
+    return clip.with_effects([vfx.SlideIn(t, side)])
+# SlideOut
+def slideout_transition(clip: Clip, t: float, side: str) -> Clip:
+    return clip.with_effects([vfx.SlideOut(t, side)])

app/services/video.py ADDED Viewed

	@@ -0,0 +1,531 @@

+import glob
+import itertools
+import os
+import random
+import gc
+import shutil
+from typing import List
+from loguru import logger
+from moviepy import (
+    AudioFileClip,
+    ColorClip,
+    CompositeAudioClip,
+    CompositeVideoClip,
+    ImageClip,
+    TextClip,
+    VideoFileClip,
+    afx,
+    concatenate_videoclips,
+)
+from moviepy.video.tools.subtitles import SubtitlesClip
+from PIL import ImageFont
+from app.models import const
+from app.models.schema import (
+    MaterialInfo,
+    VideoAspect,
+    VideoConcatMode,
+    VideoParams,
+    VideoTransitionMode,
+)
+from app.services.utils import video_effects
+from app.utils import utils
+class SubClippedVideoClip:
+    def __init__(self, file_path, start_time=None, end_time=None, width=None, height=None, duration=None):
+        self.file_path = file_path
+        self.start_time = start_time
+        self.end_time = end_time
+        self.width = width
+        self.height = height
+        if duration is None:
+            self.duration = end_time - start_time
+        else:
+            self.duration = duration
+    def __str__(self):
+        return f"SubClippedVideoClip(file_path={self.file_path}, start_time={self.start_time}, end_time={self.end_time}, duration={self.duration}, width={self.width}, height={self.height})"
+audio_codec = "aac"
+video_codec = "libx264"
+fps = 30
+def close_clip(clip):
+    if clip is None:
+        return
+    try:
+        # close main resources
+        if hasattr(clip, 'reader') and clip.reader is not None:
+            clip.reader.close()
+        # close audio resources
+        if hasattr(clip, 'audio') and clip.audio is not None:
+            if hasattr(clip.audio, 'reader') and clip.audio.reader is not None:
+                clip.audio.reader.close()
+            del clip.audio
+        # close mask resources
+        if hasattr(clip, 'mask') and clip.mask is not None:
+            if hasattr(clip.mask, 'reader') and clip.mask.reader is not None:
+                clip.mask.reader.close()
+            del clip.mask
+        # handle child clips in composite clips
+        if hasattr(clip, 'clips') and clip.clips:
+            for child_clip in clip.clips:
+                if child_clip is not clip:  # avoid possible circular references
+                    close_clip(child_clip)
+        # clear clip list
+        if hasattr(clip, 'clips'):
+            clip.clips = []
+    except Exception as e:
+        logger.error(f"failed to close clip: {str(e)}")
+    del clip
+    gc.collect()
+def delete_files(files: List[str] | str):
+    if isinstance(files, str):
+        files = [files]
+    for file in files:
+        try:
+            os.remove(file)
+        except:
+            pass
+def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
+    if not bgm_type:
+        return ""
+    if bgm_file and os.path.exists(bgm_file):
+        return bgm_file
+    if bgm_type == "random":
+        suffix = "*.mp3"
+        song_dir = utils.song_dir()
+        files = glob.glob(os.path.join(song_dir, suffix))
+        return random.choice(files)
+    return ""
+def combine_videos(
+    combined_video_path: str,
+    video_paths: List[str],
+    audio_file: str,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+    video_concat_mode: VideoConcatMode = VideoConcatMode.random,
+    video_transition_mode: VideoTransitionMode = None,
+    max_clip_duration: int = 5,
+    threads: int = 2,
+) -> str:
+    audio_clip = AudioFileClip(audio_file)
+    audio_duration = audio_clip.duration
+    logger.info(f"audio duration: {audio_duration} seconds")
+    # Required duration of each clip
+    req_dur = audio_duration / len(video_paths)
+    req_dur = max_clip_duration
+    logger.info(f"maximum clip duration: {req_dur} seconds")
+    output_dir = os.path.dirname(combined_video_path)
+    aspect = VideoAspect(video_aspect)
+    video_width, video_height = aspect.to_resolution()
+    processed_clips = []
+    subclipped_items = []
+    video_duration = 0
+    for video_path in video_paths:
+        clip = VideoFileClip(video_path)
+        clip_duration = clip.duration
+        clip_w, clip_h = clip.size
+        close_clip(clip)
+        start_time = 0
+        while start_time < clip_duration:
+            end_time = min(start_time + max_clip_duration, clip_duration)
+            if clip_duration - start_time >= max_clip_duration:
+                subclipped_items.append(SubClippedVideoClip(file_path= video_path, start_time=start_time, end_time=end_time, width=clip_w, height=clip_h))
+            start_time = end_time
+            if video_concat_mode.value == VideoConcatMode.sequential.value:
+                break
+    # random subclipped_items order
+    if video_concat_mode.value == VideoConcatMode.random.value:
+        random.shuffle(subclipped_items)
+    logger.debug(f"total subclipped items: {len(subclipped_items)}")
+    # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
+    for i, subclipped_item in enumerate(subclipped_items):
+        if video_duration > audio_duration:
+            break
+        logger.debug(f"processing clip {i+1}: {subclipped_item.width}x{subclipped_item.height}, current duration: {video_duration:.2f}s, remaining: {audio_duration - video_duration:.2f}s")
+        try:
+            clip = VideoFileClip(subclipped_item.file_path).subclipped(subclipped_item.start_time, subclipped_item.end_time)
+            clip_duration = clip.duration
+            # Not all videos are same size, so we need to resize them
+            clip_w, clip_h = clip.size
+            if clip_w != video_width or clip_h != video_height:
+                clip_ratio = clip.w / clip.h
+                video_ratio = video_width / video_height
+                logger.debug(f"resizing clip, source: {clip_w}x{clip_h}, ratio: {clip_ratio:.2f}, target: {video_width}x{video_height}, ratio: {video_ratio:.2f}")
+                if clip_ratio == video_ratio:
+                    clip = clip.resized(new_size=(video_width, video_height))
+                else:
+                    if clip_ratio > video_ratio:
+                        scale_factor = video_width / clip_w
+                    else:
+                        scale_factor = video_height / clip_h
+                    new_width = int(clip_w * scale_factor)
+                    new_height = int(clip_h * scale_factor)
+                    background = ColorClip(size=(video_width, video_height), color=(0, 0, 0)).with_duration(clip_duration)
+                    clip_resized = clip.resized(new_size=(new_width, new_height)).with_position("center")
+                    clip = CompositeVideoClip([background, clip_resized])
+            shuffle_side = random.choice(["left", "right", "top", "bottom"])
+            if video_transition_mode.value == VideoTransitionMode.none.value:
+                clip = clip
+            elif video_transition_mode.value == VideoTransitionMode.fade_in.value:
+                clip = video_effects.fadein_transition(clip, 1)
+            elif video_transition_mode.value == VideoTransitionMode.fade_out.value:
+                clip = video_effects.fadeout_transition(clip, 1)
+            elif video_transition_mode.value == VideoTransitionMode.slide_in.value:
+                clip = video_effects.slidein_transition(clip, 1, shuffle_side)
+            elif video_transition_mode.value == VideoTransitionMode.slide_out.value:
+                clip = video_effects.slideout_transition(clip, 1, shuffle_side)
+            elif video_transition_mode.value == VideoTransitionMode.shuffle.value:
+                transition_funcs = [
+                    lambda c: video_effects.fadein_transition(c, 1),
+                    lambda c: video_effects.fadeout_transition(c, 1),
+                    lambda c: video_effects.slidein_transition(c, 1, shuffle_side),
+                    lambda c: video_effects.slideout_transition(c, 1, shuffle_side),
+                ]
+                shuffle_transition = random.choice(transition_funcs)
+                clip = shuffle_transition(clip)
+            if clip.duration > max_clip_duration:
+                clip = clip.subclipped(0, max_clip_duration)
+            # wirte clip to temp file
+            clip_file = f"{output_dir}/temp-clip-{i+1}.mp4"
+            clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec)
+            close_clip(clip)
+            processed_clips.append(SubClippedVideoClip(file_path=clip_file, duration=clip.duration, width=clip_w, height=clip_h))
+            video_duration += clip.duration
+        except Exception as e:
+            logger.error(f"failed to process clip: {str(e)}")
+    # loop processed clips until the video duration matches or exceeds the audio duration.
+    if video_duration < audio_duration:
+        logger.warning(f"video duration ({video_duration:.2f}s) is shorter than audio duration ({audio_duration:.2f}s), looping clips to match audio length.")
+        base_clips = processed_clips.copy()
+        for clip in itertools.cycle(base_clips):
+            if video_duration >= audio_duration:
+                break
+            processed_clips.append(clip)
+            video_duration += clip.duration
+        logger.info(f"video duration: {video_duration:.2f}s, audio duration: {audio_duration:.2f}s, looped {len(processed_clips)-len(base_clips)} clips")
+    # merge video clips progressively, avoid loading all videos at once to avoid memory overflow
+    logger.info("starting clip merging process")
+    if not processed_clips:
+        logger.warning("no clips available for merging")
+        return combined_video_path
+    # if there is only one clip, use it directly
+    if len(processed_clips) == 1:
+        logger.info("using single clip directly")
+        shutil.copy(processed_clips[0].file_path, combined_video_path)
+        delete_files(processed_clips)
+        logger.info("video combining completed")
+        return combined_video_path
+    # create initial video file as base
+    base_clip_path = processed_clips[0].file_path
+    temp_merged_video = f"{output_dir}/temp-merged-video.mp4"
+    temp_merged_next = f"{output_dir}/temp-merged-next.mp4"
+    # copy first clip as initial merged video
+    shutil.copy(base_clip_path, temp_merged_video)
+    # merge remaining video clips one by one
+    for i, clip in enumerate(processed_clips[1:], 1):
+        logger.info(f"merging clip {i}/{len(processed_clips)-1}, duration: {clip.duration:.2f}s")
+        try:
+            # load current base video and next clip to merge
+            base_clip = VideoFileClip(temp_merged_video)
+            next_clip = VideoFileClip(clip.file_path)
+            # merge these two clips
+            merged_clip = concatenate_videoclips([base_clip, next_clip])
+            # save merged result to temp file
+            merged_clip.write_videofile(
+                filename=temp_merged_next,
+                threads=threads,
+                logger=None,
+                temp_audiofile_path=output_dir,
+                audio_codec=audio_codec,
+                fps=fps,
+            )
+            close_clip(base_clip)
+            close_clip(next_clip)
+            close_clip(merged_clip)
+            # replace base file with new merged file
+            delete_files(temp_merged_video)
+            os.rename(temp_merged_next, temp_merged_video)
+        except Exception as e:
+            logger.error(f"failed to merge clip: {str(e)}")
+            continue
+    # after merging, rename final result to target file name
+    os.rename(temp_merged_video, combined_video_path)
+    # clean temp files
+    clip_files = [clip.file_path for clip in processed_clips]
+    delete_files(clip_files)
+    logger.info("video combining completed")
+    return combined_video_path
+def wrap_text(text, max_width, font="Arial", fontsize=60):
+    # Create ImageFont
+    font = ImageFont.truetype(font, fontsize)
+    def get_text_size(inner_text):
+        inner_text = inner_text.strip()
+        left, top, right, bottom = font.getbbox(inner_text)
+        return right - left, bottom - top
+    width, height = get_text_size(text)
+    if width <= max_width:
+        return text, height
+    processed = True
+    _wrapped_lines_ = []
+    words = text.split(" ")
+    _txt_ = ""
+    for word in words:
+        _before = _txt_
+        _txt_ += f"{word} "
+        _width, _height = get_text_size(_txt_)
+        if _width <= max_width:
+            continue
+        else:
+            if _txt_.strip() == word.strip():
+                processed = False
+                break
+            _wrapped_lines_.append(_before)
+            _txt_ = f"{word} "
+    _wrapped_lines_.append(_txt_)
+    if processed:
+        _wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
+        result = "\n".join(_wrapped_lines_).strip()
+        height = len(_wrapped_lines_) * height
+        return result, height
+    _wrapped_lines_ = []
+    chars = list(text)
+    _txt_ = ""
+    for word in chars:
+        _txt_ += word
+        _width, _height = get_text_size(_txt_)
+        if _width <= max_width:
+            continue
+        else:
+            _wrapped_lines_.append(_txt_)
+            _txt_ = ""
+    _wrapped_lines_.append(_txt_)
+    result = "\n".join(_wrapped_lines_).strip()
+    height = len(_wrapped_lines_) * height
+    return result, height
+def generate_video(
+    video_path: str,
+    audio_path: str,
+    subtitle_path: str,
+    output_file: str,
+    params: VideoParams,
+):
+    aspect = VideoAspect(params.video_aspect)
+    video_width, video_height = aspect.to_resolution()
+    logger.info(f"generating video: {video_width} x {video_height}")
+    logger.info(f"  ① video: {video_path}")
+    logger.info(f"  ② audio: {audio_path}")
+    logger.info(f"  ③ subtitle: {subtitle_path}")
+    logger.info(f"  ④ output: {output_file}")
+    # https://github.com/harry0703/MoneyPrinterTurbo/issues/217
+    # PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'final-1.mp4.tempTEMP_MPY_wvf_snd.mp3'
+    # write into the same directory as the output file
+    output_dir = os.path.dirname(output_file)
+    font_path = ""
+    if params.subtitle_enabled:
+        if not params.font_name:
+            params.font_name = "STHeitiMedium.ttc"
+        font_path = os.path.join(utils.font_dir(), params.font_name)
+        if os.name == "nt":
+            font_path = font_path.replace("\\", "/")
+        logger.info(f"  ⑤ font: {font_path}")
+    def create_text_clip(subtitle_item):
+        params.font_size = int(params.font_size)
+        params.stroke_width = int(params.stroke_width)
+        phrase = subtitle_item[1]
+        max_width = video_width * 0.9
+        wrapped_txt, txt_height = wrap_text(
+            phrase, max_width=max_width, font=font_path, fontsize=params.font_size
+        )
+        interline = int(params.font_size * 0.25)
+        size=(int(max_width), int(txt_height + params.font_size * 0.25 + (interline * (wrapped_txt.count("\n") + 1))))
+        _clip = TextClip(
+            text=wrapped_txt,
+            font=font_path,
+            font_size=params.font_size,
+            color=params.text_fore_color,
+            bg_color=params.text_background_color,
+            stroke_color=params.stroke_color,
+            stroke_width=params.stroke_width,
+            # interline=interline,
+            # size=size,
+        )
+        duration = subtitle_item[0][1] - subtitle_item[0][0]
+        _clip = _clip.with_start(subtitle_item[0][0])
+        _clip = _clip.with_end(subtitle_item[0][1])
+        _clip = _clip.with_duration(duration)
+        if params.subtitle_position == "bottom":
+            _clip = _clip.with_position(("center", video_height * 0.95 - _clip.h))
+        elif params.subtitle_position == "top":
+            _clip = _clip.with_position(("center", video_height * 0.05))
+        elif params.subtitle_position == "custom":
+            # Ensure the subtitle is fully within the screen bounds
+            margin = 10  # Additional margin, in pixels
+            max_y = video_height - _clip.h - margin
+            min_y = margin
+            custom_y = (video_height - _clip.h) * (params.custom_position / 100)
+            custom_y = max(
+                min_y, min(custom_y, max_y)
+            )  # Constrain the y value within the valid range
+            _clip = _clip.with_position(("center", custom_y))
+        else:  # center
+            _clip = _clip.with_position(("center", "center"))
+        return _clip
+    video_clip = VideoFileClip(video_path).without_audio()
+    audio_clip = AudioFileClip(audio_path).with_effects(
+        [afx.MultiplyVolume(params.voice_volume)]
+    )
+    def make_textclip(text):
+        return TextClip(
+            text=text,
+            font=font_path,
+            font_size=params.font_size,
+        )
+    if subtitle_path and os.path.exists(subtitle_path):
+        sub = SubtitlesClip(
+            subtitles=subtitle_path, encoding="utf-8", make_textclip=make_textclip
+        )
+        text_clips = []
+        for item in sub.subtitles:
+            clip = create_text_clip(subtitle_item=item)
+            text_clips.append(clip)
+        video_clip = CompositeVideoClip([video_clip, *text_clips])
+    bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
+    if bgm_file:
+        try:
+            bgm_clip = AudioFileClip(bgm_file).with_effects(
+                [
+                    afx.MultiplyVolume(params.bgm_volume),
+                    afx.AudioFadeOut(3),
+                    afx.AudioLoop(duration=video_clip.duration),
+                ]
+            )
+            audio_clip = CompositeAudioClip([audio_clip, bgm_clip])
+        except Exception as e:
+            logger.error(f"failed to add bgm: {str(e)}")
+    video_clip = video_clip.with_audio(audio_clip)
+    video_clip.write_videofile(
+        output_file,
+        audio_codec=audio_codec,
+        temp_audiofile_path=output_dir,
+        threads=params.n_threads or 2,
+        logger=None,
+        fps=fps,
+    )
+    video_clip.close()
+    del video_clip
+def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
+    for material in materials:
+        if not material.url:
+            continue
+        ext = utils.parse_extension(material.url)
+        try:
+            clip = VideoFileClip(material.url)
+        except Exception:
+            clip = ImageClip(material.url)
+        width = clip.size[0]
+        height = clip.size[1]
+        if width < 480 or height < 480:
+            logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required")
+            continue
+        if ext in const.FILE_TYPE_IMAGES:
+            logger.info(f"processing image: {material.url}")
+            # Create an image clip and set its duration to 3 seconds
+            clip = (
+                ImageClip(material.url)
+                .with_duration(clip_duration)
+                .with_position("center")
+            )
+            # Apply a zoom effect using the resize method.
+            # A lambda function is used to make the zoom effect dynamic over time.
+            # The zoom effect starts from the original size and gradually scales up to 120%.
+            # t represents the current time, and clip.duration is the total duration of the clip (3 seconds).
+            # Note: 1 represents 100% size, so 1.2 represents 120% size.
+            zoom_clip = clip.resized(
+                lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
+            )
+            # Optionally, create a composite video clip containing the zoomed clip.
+            # This is useful when you want to add other elements to the video.
+            final_clip = CompositeVideoClip([zoom_clip])
+            # Output the video to a file.
+            video_file = f"{material.url}.mp4"
+            final_clip.write_videofile(video_file, fps=30, logger=None)
+            close_clip(clip)
+            material.url = video_file
+            logger.success(f"image processed: {video_file}")
+    return materials

app/services/voice.py ADDED Viewed

	@@ -0,0 +1,1566 @@

+import asyncio
+import os
+import re
+from datetime import datetime
+from typing import Union
+from xml.sax.saxutils import unescape
+import edge_tts
+import requests
+from edge_tts import SubMaker, submaker
+from edge_tts.submaker import mktimestamp
+from loguru import logger
+from moviepy.video.tools import subtitles
+from app.config import config
+from app.utils import utils
+def get_siliconflow_voices() -> list[str]:
+    """
+    获取硅基流动的声音列表
+    Returns:
+        声音列表，格式为 ["siliconflow:FunAudioLLM/CosyVoice2-0.5B:alex", ...]
+    """
+    # 硅基流动的声音列表和对应的性别（用于显示）
+    voices_with_gender = [
+        ("FunAudioLLM/CosyVoice2-0.5B", "alex", "Male"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "anna", "Female"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "bella", "Female"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "benjamin", "Male"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "charles", "Male"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "claire", "Female"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "david", "Male"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "diana", "Female"),
+    ]
+    # 添加siliconflow:前缀，并格式化为显示名称
+    return [
+        f"siliconflow:{model}:{voice}-{gender}"
+        for model, voice, gender in voices_with_gender
+    ]
+def get_all_azure_voices(filter_locals=None) -> list[str]:
+    azure_voices_str = """
+Name: af-ZA-AdriNeural
+Gender: Female
+Name: af-ZA-WillemNeural
+Gender: Male
+Name: am-ET-AmehaNeural
+Gender: Male
+Name: am-ET-MekdesNeural
+Gender: Female
+Name: ar-AE-FatimaNeural
+Gender: Female
+Name: ar-AE-HamdanNeural
+Gender: Male
+Name: ar-BH-AliNeural
+Gender: Male
+Name: ar-BH-LailaNeural
+Gender: Female
+Name: ar-DZ-AminaNeural
+Gender: Female
+Name: ar-DZ-IsmaelNeural
+Gender: Male
+Name: ar-EG-SalmaNeural
+Gender: Female
+Name: ar-EG-ShakirNeural
+Gender: Male
+Name: ar-IQ-BasselNeural
+Gender: Male
+Name: ar-IQ-RanaNeural
+Gender: Female
+Name: ar-JO-SanaNeural
+Gender: Female
+Name: ar-JO-TaimNeural
+Gender: Male
+Name: ar-KW-FahedNeural
+Gender: Male
+Name: ar-KW-NouraNeural
+Gender: Female
+Name: ar-LB-LaylaNeural
+Gender: Female
+Name: ar-LB-RamiNeural
+Gender: Male
+Name: ar-LY-ImanNeural
+Gender: Female
+Name: ar-LY-OmarNeural
+Gender: Male
+Name: ar-MA-JamalNeural
+Gender: Male
+Name: ar-MA-MounaNeural
+Gender: Female
+Name: ar-OM-AbdullahNeural
+Gender: Male
+Name: ar-OM-AyshaNeural
+Gender: Female
+Name: ar-QA-AmalNeural
+Gender: Female
+Name: ar-QA-MoazNeural
+Gender: Male
+Name: ar-SA-HamedNeural
+Gender: Male
+Name: ar-SA-ZariyahNeural
+Gender: Female
+Name: ar-SY-AmanyNeural
+Gender: Female
+Name: ar-SY-LaithNeural
+Gender: Male
+Name: ar-TN-HediNeural
+Gender: Male
+Name: ar-TN-ReemNeural
+Gender: Female
+Name: ar-YE-MaryamNeural
+Gender: Female
+Name: ar-YE-SalehNeural
+Gender: Male
+Name: az-AZ-BabekNeural
+Gender: Male
+Name: az-AZ-BanuNeural
+Gender: Female
+Name: bg-BG-BorislavNeural
+Gender: Male
+Name: bg-BG-KalinaNeural
+Gender: Female
+Name: bn-BD-NabanitaNeural
+Gender: Female
+Name: bn-BD-PradeepNeural
+Gender: Male
+Name: bn-IN-BashkarNeural
+Gender: Male
+Name: bn-IN-TanishaaNeural
+Gender: Female
+Name: bs-BA-GoranNeural
+Gender: Male
+Name: bs-BA-VesnaNeural
+Gender: Female
+Name: ca-ES-EnricNeural
+Gender: Male
+Name: ca-ES-JoanaNeural
+Gender: Female
+Name: cs-CZ-AntoninNeural
+Gender: Male
+Name: cs-CZ-VlastaNeural
+Gender: Female
+Name: cy-GB-AledNeural
+Gender: Male
+Name: cy-GB-NiaNeural
+Gender: Female
+Name: da-DK-ChristelNeural
+Gender: Female
+Name: da-DK-JeppeNeural
+Gender: Male
+Name: de-AT-IngridNeural
+Gender: Female
+Name: de-AT-JonasNeural
+Gender: Male
+Name: de-CH-JanNeural
+Gender: Male
+Name: de-CH-LeniNeural
+Gender: Female
+Name: de-DE-AmalaNeural
+Gender: Female
+Name: de-DE-ConradNeural
+Gender: Male
+Name: de-DE-FlorianMultilingualNeural
+Gender: Male
+Name: de-DE-KatjaNeural
+Gender: Female
+Name: de-DE-KillianNeural
+Gender: Male
+Name: de-DE-SeraphinaMultilingualNeural
+Gender: Female
+Name: el-GR-AthinaNeural
+Gender: Female
+Name: el-GR-NestorasNeural
+Gender: Male
+Name: en-AU-NatashaNeural
+Gender: Female
+Name: en-AU-WilliamNeural
+Gender: Male
+Name: en-CA-ClaraNeural
+Gender: Female
+Name: en-CA-LiamNeural
+Gender: Male
+Name: en-GB-LibbyNeural
+Gender: Female
+Name: en-GB-MaisieNeural
+Gender: Female
+Name: en-GB-RyanNeural
+Gender: Male
+Name: en-GB-SoniaNeural
+Gender: Female
+Name: en-GB-ThomasNeural
+Gender: Male
+Name: en-HK-SamNeural
+Gender: Male
+Name: en-HK-YanNeural
+Gender: Female
+Name: en-IE-ConnorNeural
+Gender: Male
+Name: en-IE-EmilyNeural
+Gender: Female
+Name: en-IN-NeerjaExpressiveNeural
+Gender: Female
+Name: en-IN-NeerjaNeural
+Gender: Female
+Name: en-IN-PrabhatNeural
+Gender: Male
+Name: en-KE-AsiliaNeural
+Gender: Female
+Name: en-KE-ChilembaNeural
+Gender: Male
+Name: en-NG-AbeoNeural
+Gender: Male
+Name: en-NG-EzinneNeural
+Gender: Female
+Name: en-NZ-MitchellNeural
+Gender: Male
+Name: en-NZ-MollyNeural
+Gender: Female
+Name: en-PH-JamesNeural
+Gender: Male
+Name: en-PH-RosaNeural
+Gender: Female
+Name: en-SG-LunaNeural
+Gender: Female
+Name: en-SG-WayneNeural
+Gender: Male
+Name: en-TZ-ElimuNeural
+Gender: Male
+Name: en-TZ-ImaniNeural
+Gender: Female
+Name: en-US-AnaNeural
+Gender: Female
+Name: en-US-AndrewMultilingualNeural
+Gender: Male
+Name: en-US-AndrewNeural
+Gender: Male
+Name: en-US-AriaNeural
+Gender: Female
+Name: en-US-AvaMultilingualNeural
+Gender: Female
+Name: en-US-AvaNeural
+Gender: Female
+Name: en-US-BrianMultilingualNeural
+Gender: Male
+Name: en-US-BrianNeural
+Gender: Male
+Name: en-US-ChristopherNeural
+Gender: Male
+Name: en-US-EmmaMultilingualNeural
+Gender: Female
+Name: en-US-EmmaNeural
+Gender: Female
+Name: en-US-EricNeural
+Gender: Male
+Name: en-US-GuyNeural
+Gender: Male
+Name: en-US-JennyNeural
+Gender: Female
+Name: en-US-MichelleNeural
+Gender: Female
+Name: en-US-RogerNeural
+Gender: Male
+Name: en-US-SteffanNeural
+Gender: Male
+Name: en-ZA-LeahNeural
+Gender: Female
+Name: en-ZA-LukeNeural
+Gender: Male
+Name: es-AR-ElenaNeural
+Gender: Female
+Name: es-AR-TomasNeural
+Gender: Male
+Name: es-BO-MarceloNeural
+Gender: Male
+Name: es-BO-SofiaNeural
+Gender: Female
+Name: es-CL-CatalinaNeural
+Gender: Female
+Name: es-CL-LorenzoNeural
+Gender: Male
+Name: es-CO-GonzaloNeural
+Gender: Male
+Name: es-CO-SalomeNeural
+Gender: Female
+Name: es-CR-JuanNeural
+Gender: Male
+Name: es-CR-MariaNeural
+Gender: Female
+Name: es-CU-BelkysNeural
+Gender: Female
+Name: es-CU-ManuelNeural
+Gender: Male
+Name: es-DO-EmilioNeural
+Gender: Male
+Name: es-DO-RamonaNeural
+Gender: Female
+Name: es-EC-AndreaNeural
+Gender: Female
+Name: es-EC-LuisNeural
+Gender: Male
+Name: es-ES-AlvaroNeural
+Gender: Male
+Name: es-ES-ElviraNeural
+Gender: Female
+Name: es-ES-XimenaNeural
+Gender: Female
+Name: es-GQ-JavierNeural
+Gender: Male
+Name: es-GQ-TeresaNeural
+Gender: Female
+Name: es-GT-AndresNeural
+Gender: Male
+Name: es-GT-MartaNeural
+Gender: Female
+Name: es-HN-CarlosNeural
+Gender: Male
+Name: es-HN-KarlaNeural
+Gender: Female
+Name: es-MX-DaliaNeural
+Gender: Female
+Name: es-MX-JorgeNeural
+Gender: Male
+Name: es-NI-FedericoNeural
+Gender: Male
+Name: es-NI-YolandaNeural
+Gender: Female
+Name: es-PA-MargaritaNeural
+Gender: Female
+Name: es-PA-RobertoNeural
+Gender: Male
+Name: es-PE-AlexNeural
+Gender: Male
+Name: es-PE-CamilaNeural
+Gender: Female
+Name: es-PR-KarinaNeural
+Gender: Female
+Name: es-PR-VictorNeural
+Gender: Male
+Name: es-PY-MarioNeural
+Gender: Male
+Name: es-PY-TaniaNeural
+Gender: Female
+Name: es-SV-LorenaNeural
+Gender: Female
+Name: es-SV-RodrigoNeural
+Gender: Male
+Name: es-US-AlonsoNeural
+Gender: Male
+Name: es-US-PalomaNeural
+Gender: Female
+Name: es-UY-MateoNeural
+Gender: Male
+Name: es-UY-ValentinaNeural
+Gender: Female
+Name: es-VE-PaolaNeural
+Gender: Female
+Name: es-VE-SebastianNeural
+Gender: Male
+Name: et-EE-AnuNeural
+Gender: Female
+Name: et-EE-KertNeural
+Gender: Male
+Name: fa-IR-DilaraNeural
+Gender: Female
+Name: fa-IR-FaridNeural
+Gender: Male
+Name: fi-FI-HarriNeural
+Gender: Male
+Name: fi-FI-NooraNeural
+Gender: Female
+Name: fil-PH-AngeloNeural
+Gender: Male
+Name: fil-PH-BlessicaNeural
+Gender: Female
+Name: fr-BE-CharlineNeural
+Gender: Female
+Name: fr-BE-GerardNeural
+Gender: Male
+Name: fr-CA-AntoineNeural
+Gender: Male
+Name: fr-CA-JeanNeural
+Gender: Male
+Name: fr-CA-SylvieNeural
+Gender: Female
+Name: fr-CA-ThierryNeural
+Gender: Male
+Name: fr-CH-ArianeNeural
+Gender: Female
+Name: fr-CH-FabriceNeural
+Gender: Male
+Name: fr-FR-DeniseNeural
+Gender: Female
+Name: fr-FR-EloiseNeural
+Gender: Female
+Name: fr-FR-HenriNeural
+Gender: Male
+Name: fr-FR-RemyMultilingualNeural
+Gender: Male
+Name: fr-FR-VivienneMultilingualNeural
+Gender: Female
+Name: ga-IE-ColmNeural
+Gender: Male
+Name: ga-IE-OrlaNeural
+Gender: Female
+Name: gl-ES-RoiNeural
+Gender: Male
+Name: gl-ES-SabelaNeural
+Gender: Female
+Name: gu-IN-DhwaniNeural
+Gender: Female
+Name: gu-IN-NiranjanNeural
+Gender: Male
+Name: he-IL-AvriNeural
+Gender: Male
+Name: he-IL-HilaNeural
+Gender: Female
+Name: hi-IN-MadhurNeural
+Gender: Male
+Name: hi-IN-SwaraNeural
+Gender: Female
+Name: hr-HR-GabrijelaNeural
+Gender: Female
+Name: hr-HR-SreckoNeural
+Gender: Male
+Name: hu-HU-NoemiNeural
+Gender: Female
+Name: hu-HU-TamasNeural
+Gender: Male
+Name: id-ID-ArdiNeural
+Gender: Male
+Name: id-ID-GadisNeural
+Gender: Female
+Name: is-IS-GudrunNeural
+Gender: Female
+Name: is-IS-GunnarNeural
+Gender: Male
+Name: it-IT-DiegoNeural
+Gender: Male
+Name: it-IT-ElsaNeural
+Gender: Female
+Name: it-IT-GiuseppeMultilingualNeural
+Gender: Male
+Name: it-IT-IsabellaNeural
+Gender: Female
+Name: iu-Cans-CA-SiqiniqNeural
+Gender: Female
+Name: iu-Cans-CA-TaqqiqNeural
+Gender: Male
+Name: iu-Latn-CA-SiqiniqNeural
+Gender: Female
+Name: iu-Latn-CA-TaqqiqNeural
+Gender: Male
+Name: ja-JP-KeitaNeural
+Gender: Male
+Name: ja-JP-NanamiNeural
+Gender: Female
+Name: jv-ID-DimasNeural
+Gender: Male
+Name: jv-ID-SitiNeural
+Gender: Female
+Name: ka-GE-EkaNeural
+Gender: Female
+Name: ka-GE-GiorgiNeural
+Gender: Male
+Name: kk-KZ-AigulNeural
+Gender: Female
+Name: kk-KZ-DauletNeural
+Gender: Male
+Name: km-KH-PisethNeural
+Gender: Male
+Name: km-KH-SreymomNeural
+Gender: Female
+Name: kn-IN-GaganNeural
+Gender: Male
+Name: kn-IN-SapnaNeural
+Gender: Female
+Name: ko-KR-HyunsuMultilingualNeural
+Gender: Male
+Name: ko-KR-InJoonNeural
+Gender: Male
+Name: ko-KR-SunHiNeural
+Gender: Female
+Name: lo-LA-ChanthavongNeural
+Gender: Male
+Name: lo-LA-KeomanyNeural
+Gender: Female
+Name: lt-LT-LeonasNeural
+Gender: Male
+Name: lt-LT-OnaNeural
+Gender: Female
+Name: lv-LV-EveritaNeural
+Gender: Female
+Name: lv-LV-NilsNeural
+Gender: Male
+Name: mk-MK-AleksandarNeural
+Gender: Male
+Name: mk-MK-MarijaNeural
+Gender: Female
+Name: ml-IN-MidhunNeural
+Gender: Male
+Name: ml-IN-SobhanaNeural
+Gender: Female
+Name: mn-MN-BataaNeural
+Gender: Male
+Name: mn-MN-YesuiNeural
+Gender: Female
+Name: mr-IN-AarohiNeural
+Gender: Female
+Name: mr-IN-ManoharNeural
+Gender: Male
+Name: ms-MY-OsmanNeural
+Gender: Male
+Name: ms-MY-YasminNeural
+Gender: Female
+Name: mt-MT-GraceNeural
+Gender: Female
+Name: mt-MT-JosephNeural
+Gender: Male
+Name: my-MM-NilarNeural
+Gender: Female
+Name: my-MM-ThihaNeural
+Gender: Male
+Name: nb-NO-FinnNeural
+Gender: Male
+Name: nb-NO-PernilleNeural
+Gender: Female
+Name: ne-NP-HemkalaNeural
+Gender: Female
+Name: ne-NP-SagarNeural
+Gender: Male
+Name: nl-BE-ArnaudNeural
+Gender: Male
+Name: nl-BE-DenaNeural
+Gender: Female
+Name: nl-NL-ColetteNeural
+Gender: Female
+Name: nl-NL-FennaNeural
+Gender: Female
+Name: nl-NL-MaartenNeural
+Gender: Male
+Name: pl-PL-MarekNeural
+Gender: Male
+Name: pl-PL-ZofiaNeural
+Gender: Female
+Name: ps-AF-GulNawazNeural
+Gender: Male
+Name: ps-AF-LatifaNeural
+Gender: Female
+Name: pt-BR-AntonioNeural
+Gender: Male
+Name: pt-BR-FranciscaNeural
+Gender: Female
+Name: pt-BR-ThalitaMultilingualNeural
+Gender: Female
+Name: pt-PT-DuarteNeural
+Gender: Male
+Name: pt-PT-RaquelNeural
+Gender: Female
+Name: ro-RO-AlinaNeural
+Gender: Female
+Name: ro-RO-EmilNeural
+Gender: Male
+Name: ru-RU-DmitryNeural
+Gender: Male
+Name: ru-RU-SvetlanaNeural
+Gender: Female
+Name: si-LK-SameeraNeural
+Gender: Male
+Name: si-LK-ThiliniNeural
+Gender: Female
+Name: sk-SK-LukasNeural
+Gender: Male
+Name: sk-SK-ViktoriaNeural
+Gender: Female
+Name: sl-SI-PetraNeural
+Gender: Female
+Name: sl-SI-RokNeural
+Gender: Male
+Name: so-SO-MuuseNeural
+Gender: Male
+Name: so-SO-UbaxNeural
+Gender: Female
+Name: sq-AL-AnilaNeural
+Gender: Female
+Name: sq-AL-IlirNeural
+Gender: Male
+Name: sr-RS-NicholasNeural
+Gender: Male
+Name: sr-RS-SophieNeural
+Gender: Female
+Name: su-ID-JajangNeural
+Gender: Male
+Name: su-ID-TutiNeural
+Gender: Female
+Name: sv-SE-MattiasNeural
+Gender: Male
+Name: sv-SE-SofieNeural
+Gender: Female
+Name: sw-KE-RafikiNeural
+Gender: Male
+Name: sw-KE-ZuriNeural
+Gender: Female
+Name: sw-TZ-DaudiNeural
+Gender: Male
+Name: sw-TZ-RehemaNeural
+Gender: Female
+Name: ta-IN-PallaviNeural
+Gender: Female
+Name: ta-IN-ValluvarNeural
+Gender: Male
+Name: ta-LK-KumarNeural
+Gender: Male
+Name: ta-LK-SaranyaNeural
+Gender: Female
+Name: ta-MY-KaniNeural
+Gender: Female
+Name: ta-MY-SuryaNeural
+Gender: Male
+Name: ta-SG-AnbuNeural
+Gender: Male
+Name: ta-SG-VenbaNeural
+Gender: Female
+Name: te-IN-MohanNeural
+Gender: Male
+Name: te-IN-ShrutiNeural
+Gender: Female
+Name: th-TH-NiwatNeural
+Gender: Male
+Name: th-TH-PremwadeeNeural
+Gender: Female
+Name: tr-TR-AhmetNeural
+Gender: Male
+Name: tr-TR-EmelNeural
+Gender: Female
+Name: uk-UA-OstapNeural
+Gender: Male
+Name: uk-UA-PolinaNeural
+Gender: Female
+Name: ur-IN-GulNeural
+Gender: Female
+Name: ur-IN-SalmanNeural
+Gender: Male
+Name: ur-PK-AsadNeural
+Gender: Male
+Name: ur-PK-UzmaNeural
+Gender: Female
+Name: uz-UZ-MadinaNeural
+Gender: Female
+Name: uz-UZ-SardorNeural
+Gender: Male
+Name: vi-VN-HoaiMyNeural
+Gender: Female
+Name: vi-VN-NamMinhNeural
+Gender: Male
+Name: zh-CN-XiaoxiaoNeural
+Gender: Female
+Name: zh-CN-XiaoyiNeural
+Gender: Female
+Name: zh-CN-YunjianNeural
+Gender: Male
+Name: zh-CN-YunxiNeural
+Gender: Male
+Name: zh-CN-YunxiaNeural
+Gender: Male
+Name: zh-CN-YunyangNeural
+Gender: Male
+Name: zh-CN-liaoning-XiaobeiNeural
+Gender: Female
+Name: zh-CN-shaanxi-XiaoniNeural
+Gender: Female
+Name: zh-HK-HiuGaaiNeural
+Gender: Female
+Name: zh-HK-HiuMaanNeural
+Gender: Female
+Name: zh-HK-WanLungNeural
+Gender: Male
+Name: zh-TW-HsiaoChenNeural
+Gender: Female
+Name: zh-TW-HsiaoYuNeural
+Gender: Female
+Name: zh-TW-YunJheNeural
+Gender: Male
+Name: zu-ZA-ThandoNeural
+Gender: Female
+Name: zu-ZA-ThembaNeural
+Gender: Male
+Name: en-US-AvaMultilingualNeural-V2
+Gender: Female
+Name: en-US-AndrewMultilingualNeural-V2
+Gender: Male
+Name: en-US-EmmaMultilingualNeural-V2
+Gender: Female
+Name: en-US-BrianMultilingualNeural-V2
+Gender: Male
+Name: de-DE-FlorianMultilingualNeural-V2
+Gender: Male
+Name: de-DE-SeraphinaMultilingualNeural-V2
+Gender: Female
+Name: fr-FR-RemyMultilingualNeural-V2
+Gender: Male
+Name: fr-FR-VivienneMultilingualNeural-V2
+Gender: Female
+Name: zh-CN-XiaoxiaoMultilingualNeural-V2
+Gender: Female
+    """.strip()
+    voices = []
+    # 定义正则表达式模式，用于匹配 Name 和 Gender 行
+    pattern = re.compile(r"Name:\s*(.+)\s*Gender:\s*(.+)\s*", re.MULTILINE)
+    # 使用正则表达式查找所有匹配项
+    matches = pattern.findall(azure_voices_str)
+    for name, gender in matches:
+        # 应用过滤条件
+        if filter_locals and any(
+            name.lower().startswith(fl.lower()) for fl in filter_locals
+        ):
+            voices.append(f"{name}-{gender}")
+        elif not filter_locals:
+            voices.append(f"{name}-{gender}")
+    voices.sort()
+    return voices
+def parse_voice_name(name: str):
+    # zh-CN-XiaoyiNeural-Female
+    # zh-CN-YunxiNeural-Male
+    # zh-CN-XiaoxiaoMultilingualNeural-V2-Female
+    name = name.replace("-Female", "").replace("-Male", "").strip()
+    return name
+def is_azure_v2_voice(voice_name: str):
+    voice_name = parse_voice_name(voice_name)
+    if voice_name.endswith("-V2"):
+        return voice_name.replace("-V2", "").strip()
+    return ""
+def is_siliconflow_voice(voice_name: str):
+    """检查是否是硅基流动的声音"""
+    return voice_name.startswith("siliconflow:")
+def tts(
+    text: str,
+    voice_name: str,
+    voice_rate: float,
+    voice_file: str,
+    voice_volume: float = 1.0,
+) -> Union[SubMaker, None]:
+    if is_azure_v2_voice(voice_name):
+        return azure_tts_v2(text, voice_name, voice_file)
+    elif is_siliconflow_voice(voice_name):
+        # 从voice_name中提取模型和声音
+        # 格式: siliconflow:model:voice-Gender
+        parts = voice_name.split(":")
+        if len(parts) >= 3:
+            model = parts[1]
+            # 移除性别后缀，例如 "alex-Male" -> "alex"
+            voice_with_gender = parts[2]
+            voice = voice_with_gender.split("-")[0]
+            # 构建完整的voice参数，格式为 "model:voice"
+            full_voice = f"{model}:{voice}"
+            return siliconflow_tts(
+                text, model, full_voice, voice_rate, voice_file, voice_volume
+            )
+        else:
+            logger.error(f"Invalid siliconflow voice name format: {voice_name}")
+            return None
+    return azure_tts_v1(text, voice_name, voice_rate, voice_file)
+def convert_rate_to_percent(rate: float) -> str:
+    if rate == 1.0:
+        return "+0%"
+    percent = round((rate - 1.0) * 100)
+    if percent > 0:
+        return f"+{percent}%"
+    else:
+        return f"{percent}%"
+def azure_tts_v1(
+    text: str, voice_name: str, voice_rate: float, voice_file: str
+) -> Union[SubMaker, None]:
+    voice_name = parse_voice_name(voice_name)
+    text = text.strip()
+    rate_str = convert_rate_to_percent(voice_rate)
+    for i in range(3):
+        try:
+            logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
+            async def _do() -> SubMaker:
+                communicate = edge_tts.Communicate(text, voice_name, rate=rate_str)
+                sub_maker = edge_tts.SubMaker()
+                with open(voice_file, "wb") as file:
+                    async for chunk in communicate.stream():
+                        if chunk["type"] == "audio":
+                            file.write(chunk["data"])
+                        elif chunk["type"] == "WordBoundary":
+                            sub_maker.create_sub(
+                                (chunk["offset"], chunk["duration"]), chunk["text"]
+                            )
+                return sub_maker
+            sub_maker = asyncio.run(_do())
+            if not sub_maker or not sub_maker.subs:
+                logger.warning("failed, sub_maker is None or sub_maker.subs is None")
+                continue
+            logger.info(f"completed, output file: {voice_file}")
+            return sub_maker
+        except Exception as e:
+            logger.error(f"failed, error: {str(e)}")
+    return None
+def siliconflow_tts(
+    text: str,
+    model: str,
+    voice: str,
+    voice_rate: float,
+    voice_file: str,
+    voice_volume: float = 1.0,
+) -> Union[SubMaker, None]:
+    """
+    使用硅基流动的API生成语音
+    Args:
+        text: 要转换为语音的文本
+        model: 模型名称，如 "FunAudioLLM/CosyVoice2-0.5B"
+        voice: 声音名称，如 "FunAudioLLM/CosyVoice2-0.5B:alex"
+        voice_rate: 语音速度，范围[0.25, 4.0]
+        voice_file: 输出的音频文件路径
+        voice_volume: 语音音量，范围[0.6, 5.0]，需要转换为硅基流动的增益范围[-10, 10]
+    Returns:
+        SubMaker对象或None
+    """
+    text = text.strip()
+    api_key = config.siliconflow.get("api_key", "")
+    if not api_key:
+        logger.error("SiliconFlow API key is not set")
+        return None
+    # 将voice_volume转换为硅基流动的增益范围
+    # 默认voice_volume为1.0，对应gain为0
+    gain = voice_volume - 1.0
+    # 确保gain在[-10, 10]范围内
+    gain = max(-10, min(10, gain))
+    url = "https://api.siliconflow.cn/v1/audio/speech"
+    payload = {
+        "model": model,
+        "input": text,
+        "voice": voice,
+        "response_format": "mp3",
+        "sample_rate": 32000,
+        "stream": False,
+        "speed": voice_rate,
+        "gain": gain,
+    }
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+    for i in range(3):  # 尝试3次
+        try:
+            logger.info(
+                f"start siliconflow tts, model: {model}, voice: {voice}, try: {i + 1}"
+            )
+            response = requests.post(url, json=payload, headers=headers)
+            if response.status_code == 200:
+                # 保存音频文件
+                with open(voice_file, "wb") as f:
+                    f.write(response.content)
+                # 创建一个空的SubMaker对象
+                sub_maker = SubMaker()
+                # 获取音频文件的实际长度
+                try:
+                    # 尝试使用moviepy获取音频长度
+                    from moviepy import AudioFileClip
+                    audio_clip = AudioFileClip(voice_file)
+                    audio_duration = audio_clip.duration
+                    audio_clip.close()
+                    # 将音频长度转换为100纳秒单位（与edge_tts兼容）
+                    audio_duration_100ns = int(audio_duration * 10000000)
+                    # 使用文本分割来创建更准确的字幕
+                    # 将文本按标点符号分割成句子
+                    sentences = utils.split_string_by_punctuations(text)
+                    if sentences:
+                        # 计算每个句子的大致时长（按字符数比例分配）
+                        total_chars = sum(len(s) for s in sentences)
+                        char_duration = (
+                            audio_duration_100ns / total_chars if total_chars > 0 else 0
+                        )
+                        current_offset = 0
+                        for sentence in sentences:
+                            if not sentence.strip():
+                                continue
+                            # 计算当前句子的时长
+                            sentence_chars = len(sentence)
+                            sentence_duration = int(sentence_chars * char_duration)
+                            # 添加到SubMaker
+                            sub_maker.subs.append(sentence)
+                            sub_maker.offset.append(
+                                (current_offset, current_offset + sentence_duration)
+                            )
+                            # 更新偏移量
+                            current_offset += sentence_duration
+                    else:
+                        # 如果无法分割，则使用整个文本作为一个字幕
+                        sub_maker.subs = [text]
+                        sub_maker.offset = [(0, audio_duration_100ns)]
+                except Exception as e:
+                    logger.warning(f"Failed to create accurate subtitles: {str(e)}")
+                    # 回退到简单的字幕
+                    sub_maker.subs = [text]
+                    # 使用音频文件的实际长度，如果无法获取，则假设为10秒
+                    sub_maker.offset = [
+                        (
+                            0,
+                            audio_duration_100ns
+                            if "audio_duration_100ns" in locals()
+                            else 10000000,
+                        )
+                    ]
+                logger.success(f"siliconflow tts succeeded: {voice_file}")
+                print("s", sub_maker.subs, sub_maker.offset)
+                return sub_maker
+            else:
+                logger.error(
+                    f"siliconflow tts failed with status code {response.status_code}: {response.text}"
+                )
+        except Exception as e:
+            logger.error(f"siliconflow tts failed: {str(e)}")
+    return None
+def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
+    voice_name = is_azure_v2_voice(voice_name)
+    if not voice_name:
+        logger.error(f"invalid voice name: {voice_name}")
+        raise ValueError(f"invalid voice name: {voice_name}")
+    text = text.strip()
+    def _format_duration_to_offset(duration) -> int:
+        if isinstance(duration, str):
+            time_obj = datetime.strptime(duration, "%H:%M:%S.%f")
+            milliseconds = (
+                (time_obj.hour * 3600000)
+                + (time_obj.minute * 60000)
+                + (time_obj.second * 1000)
+                + (time_obj.microsecond // 1000)
+            )
+            return milliseconds * 10000
+        if isinstance(duration, int):
+            return duration
+        return 0
+    for i in range(3):
+        try:
+            logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
+            import azure.cognitiveservices.speech as speechsdk
+            sub_maker = SubMaker()
+            def speech_synthesizer_word_boundary_cb(evt: speechsdk.SessionEventArgs):
+                # print('WordBoundary event:')
+                # print('\tBoundaryType: {}'.format(evt.boundary_type))
+                # print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000)))
+                # print('\tDuration: {}'.format(evt.duration))
+                # print('\tText: {}'.format(evt.text))
+                # print('\tTextOffset: {}'.format(evt.text_offset))
+                # print('\tWordLength: {}'.format(evt.word_length))
+                duration = _format_duration_to_offset(str(evt.duration))
+                offset = _format_duration_to_offset(evt.audio_offset)
+                sub_maker.subs.append(evt.text)
+                sub_maker.offset.append((offset, offset + duration))
+            # Creates an instance of a speech config with specified subscription key and service region.
+            speech_key = config.azure.get("speech_key", "")
+            service_region = config.azure.get("speech_region", "")
+            if not speech_key or not service_region:
+                logger.error("Azure speech key or region is not set")
+                return None
+            audio_config = speechsdk.audio.AudioOutputConfig(
+                filename=voice_file, use_default_speaker=True
+            )
+            speech_config = speechsdk.SpeechConfig(
+                subscription=speech_key, region=service_region
+            )
+            speech_config.speech_synthesis_voice_name = voice_name
+            # speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
+            #                            value='true')
+            speech_config.set_property(
+                property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
+                value="true",
+            )
+            speech_config.set_speech_synthesis_output_format(
+                speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3
+            )
+            speech_synthesizer = speechsdk.SpeechSynthesizer(
+                audio_config=audio_config, speech_config=speech_config
+            )
+            speech_synthesizer.synthesis_word_boundary.connect(
+                speech_synthesizer_word_boundary_cb
+            )
+            result = speech_synthesizer.speak_text_async(text).get()
+            if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
+                logger.success(f"azure v2 speech synthesis succeeded: {voice_file}")
+                return sub_maker
+            elif result.reason == speechsdk.ResultReason.Canceled:
+                cancellation_details = result.cancellation_details
+                logger.error(
+                    f"azure v2 speech synthesis canceled: {cancellation_details.reason}"
+                )
+                if cancellation_details.reason == speechsdk.CancellationReason.Error:
+                    logger.error(
+                        f"azure v2 speech synthesis error: {cancellation_details.error_details}"
+                    )
+            logger.info(f"completed, output file: {voice_file}")
+        except Exception as e:
+            logger.error(f"failed, error: {str(e)}")
+    return None
+def _format_text(text: str) -> str:
+    # text = text.replace("\n", " ")
+    text = text.replace("[", " ")
+    text = text.replace("]", " ")
+    text = text.replace("(", " ")
+    text = text.replace(")", " ")
+    text = text.replace("{", " ")
+    text = text.replace("}", " ")
+    text = text.strip()
+    return text
+def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
+    """
+    优化字幕文件
+    1. 将字幕文件按照标点符号分割成多行
+    2. 逐行匹配字幕文件中的文本
+    3. 生成新的字幕文件
+    """
+    text = _format_text(text)
+    def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
+        """
+        1
+        00:00:00,000 --> 00:00:02,360
+        跑步是一项简单易行的运动
+        """
+        start_t = mktimestamp(start_time).replace(".", ",")
+        end_t = mktimestamp(end_time).replace(".", ",")
+        return f"{idx}\n{start_t} --> {end_t}\n{sub_text}\n"
+    start_time = -1.0
+    sub_items = []
+    sub_index = 0
+    script_lines = utils.split_string_by_punctuations(text)
+    def match_line(_sub_line: str, _sub_index: int):
+        if len(script_lines) <= _sub_index:
+            return ""
+        _line = script_lines[_sub_index]
+        if _sub_line == _line:
+            return script_lines[_sub_index].strip()
+        _sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
+        _line_ = re.sub(r"[^\w\s]", "", _line)
+        if _sub_line_ == _line_:
+            return _line_.strip()
+        _sub_line_ = re.sub(r"\W+", "", _sub_line)
+        _line_ = re.sub(r"\W+", "", _line)
+        if _sub_line_ == _line_:
+            return _line.strip()
+        return ""
+    sub_line = ""
+    try:
+        for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
+            _start_time, end_time = offset
+            if start_time < 0:
+                start_time = _start_time
+            sub = unescape(sub)
+            sub_line += sub
+            sub_text = match_line(sub_line, sub_index)
+            if sub_text:
+                sub_index += 1
+                line = formatter(
+                    idx=sub_index,
+                    start_time=start_time,
+                    end_time=end_time,
+                    sub_text=sub_text,
+                )
+                sub_items.append(line)
+                start_time = -1.0
+                sub_line = ""
+        if len(sub_items) == len(script_lines):
+            with open(subtitle_file, "w", encoding="utf-8") as file:
+                file.write("\n".join(sub_items) + "\n")
+            try:
+                sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
+                duration = max([tb for ((ta, tb), txt) in sbs])
+                logger.info(
+                    f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
+                )
+            except Exception as e:
+                logger.error(f"failed, error: {str(e)}")
+                os.remove(subtitle_file)
+        else:
+            logger.warning(
+                f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}"
+            )
+    except Exception as e:
+        logger.error(f"failed, error: {str(e)}")
+def get_audio_duration(sub_maker: submaker.SubMaker):
+    """
+    获取音频时长
+    """
+    if not sub_maker.offset:
+        return 0.0
+    return sub_maker.offset[-1][1] / 10000000
+if __name__ == "__main__":
+    voice_name = "zh-CN-XiaoxiaoMultilingualNeural-V2-Female"
+    voice_name = parse_voice_name(voice_name)
+    voice_name = is_azure_v2_voice(voice_name)
+    print(voice_name)
+    voices = get_all_azure_voices()
+    print(len(voices))
+    async def _do():
+        temp_dir = utils.storage_dir("temp")
+        voice_names = [
+            "zh-CN-XiaoxiaoMultilingualNeural",
+            # 女性
+            "zh-CN-XiaoxiaoNeural",
+            "zh-CN-XiaoyiNeural",
+            # 男性
+            "zh-CN-YunyangNeural",
+            "zh-CN-YunxiNeural",
+        ]
+        text = """
+        静夜思是唐代诗人李白创作的一首五言古诗。这首诗描绘了诗人在寂静的夜晚，看到窗前的明月，不禁想起远方的家乡和亲人，表达了他对家乡和亲人的深深思念之情。全诗内容是：“床前明月光，疑是地上霜。举头望明月，低头思故乡。”在这短短的四句诗中，诗人通过“明月”和“思故乡”的意象，巧妙地表达了离乡背井人的孤独与哀愁。首句“床前明月光”设景立意，通过明亮的月光引出诗人的遐想；“疑是地上霜”增添了夜晚的寒冷感，加深了诗人的孤寂之情；“举头望明月”和“低头思故乡”则是情感的升华，展现了诗人内心深处的乡愁和对家的渴望。这首诗简洁明快，情感真挚，是中国古典诗歌中非常著名的一首，也深受后人喜爱和推崇。
+            """
+        text = """
+        What is the meaning of life? This question has puzzled philosophers, scientists, and thinkers of all kinds for centuries. Throughout history, various cultures and individuals have come up with their interpretations and beliefs around the purpose of life. Some say it's to seek happiness and self-fulfillment, while others believe it's about contributing to the welfare of others and making a positive impact in the world. Despite the myriad of perspectives, one thing remains clear: the meaning of life is a deeply personal concept that varies from one person to another. It's an existential inquiry that encourages us to reflect on our values, desires, and the essence of our existence.
+        """
+        text = """
+               预计未来3天深圳冷空气活动频繁，未来两天持续阴天有小雨，出门带好雨具；
+               10-11日持续阴天有小雨，日温差小，气温在13-17℃之间，体感阴凉；
+               12日天气短暂好转，早晚清凉；
+                   """
+        text = "[Opening scene: A sunny day in a suburban neighborhood. A young boy named Alex, around 8 years old, is playing in his front yard with his loyal dog, Buddy.]\n\n[Camera zooms in on Alex as he throws a ball for Buddy to fetch. Buddy excitedly runs after it and brings it back to Alex.]\n\nAlex: Good boy, Buddy! You're the best dog ever!\n\n[Buddy barks happily and wags his tail.]\n\n[As Alex and Buddy continue playing, a series of potential dangers loom nearby, such as a stray dog approaching, a ball rolling towards the street, and a suspicious-looking stranger walking by.]\n\nAlex: Uh oh, Buddy, look out!\n\n[Buddy senses the danger and immediately springs into action. He barks loudly at the stray dog, scaring it away. Then, he rushes to retrieve the ball before it reaches the street and gently nudges it back towards Alex. Finally, he stands protectively between Alex and the stranger, growling softly to warn them away.]\n\nAlex: Wow, Buddy, you're like my superhero!\n\n[Just as Alex and Buddy are about to head inside, they hear a loud crash from a nearby construction site. They rush over to investigate and find a pile of rubble blocking the path of a kitten trapped underneath.]\n\nAlex: Oh no, Buddy, we have to help!\n\n[Buddy barks in agreement and together they work to carefully move the rubble aside, allowing the kitten to escape unharmed. The kitten gratefully nuzzles against Buddy, who responds with a friendly lick.]\n\nAlex: We did it, Buddy! We saved the day again!\n\n[As Alex and Buddy walk home together, the sun begins to set, casting a warm glow over the neighborhood.]\n\nAlex: Thanks for always being there to watch over me, Buddy. You're not just my dog, you're my best friend.\n\n[Buddy barks happily and nuzzles against Alex as they disappear into the sunset, ready to face whatever adventures tomorrow may bring.]\n\n[End scene.]"
+        text = "大家好，我是乔哥，一个想帮你把信用卡全部还清的家伙！\n今天我们要聊的是信用卡的取现功能。\n你是不是也曾经因为一时的资金紧张，而拿着信用卡到ATM机取现？如果是，那你得好好看看这个视频了。\n现在都2024年了，我以为现在不会再有人用信用卡取现功能了。前几天一个粉丝发来一张图片，取现1万。\n信用卡取现有三个弊端。\n一，信用卡取现功能代价可不小。会先收取一个取现手续费，比如这个粉丝，取现1万，按2.5%收取手续费，收取了250元。\n二，信用卡正常消费有最长56天的免息期，但取现不享受免息期。从取现那一天开始，每天按照万5收取利息，这个粉丝用了11天，收取了55元利息。\n三，频繁的取现行为，银行会认为你资金紧张，会被标记为高风险用户，影响你的综合评分和额度。\n那么，如果你资金紧张了，该怎么办呢？\n乔哥给你支一招，用破思机摩擦信用卡，只需要少量的手续费，而且还可以享受最长56天的免息期。\n最后，如果你对玩卡感兴趣，可以找乔哥领取一本《卡神秘籍》，用卡过程中遇到任何疑惑，也欢迎找乔哥交流。\n别忘了，关注乔哥，回复用卡技巧，免费领取《2024用卡技巧》，让我们一起成为用卡高手！"
+        text = """
+        2023全年业绩速览
+公司全年累计实现营业收入1476.94亿元，同比增长19.01%，归母净利润747.34亿元，同比增长19.16%。EPS达到59.49元。第四季度单季，营业收入444.25亿元，同比增长20.26%，环比增长31.86%；归母净利润218.58亿元，同比增长19.33%，环比增长29.37%。这一阶段
+的业绩表现不仅突显了公司的增长动力和盈利能力，也反映出公司在竞争激烈的市场环境中保持了良好的发展势头。
+2023年Q4业绩速览
+第四季度，营业收入贡献主要增长点；销售费用高增致盈利能力承压；税金同比上升27%，扰动净利率表现。
+业绩解读
+利润方面，2023全年贵州茅台，>归母净利润增速为19%，其中营业收入正贡献18%，营业成本正贡献百分之一，管理费用正贡献百分之一点四。(注：归母净利润增速值=营业收入增速+各科目贡献，展示贡献/拖累的前四名科目，且要求贡献值/净利润增速>15%)
+"""
+        text = "静夜思是唐代诗人李白创作的一首五言古诗。这首诗描绘了诗人在寂静的夜晚，看到窗前的明月，不禁想起远方的家乡和亲人"
+        text = _format_text(text)
+        lines = utils.split_string_by_punctuations(text)
+        print(lines)
+        for voice_name in voice_names:
+            voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
+            subtitle_file = f"{temp_dir}/tts.mp3.srt"
+            sub_maker = azure_tts_v2(
+                text=text, voice_name=voice_name, voice_file=voice_file
+            )
+            create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
+            audio_duration = get_audio_duration(sub_maker)
+            print(f"voice: {voice_name}, audio duration: {audio_duration}s")
+    loop = asyncio.get_event_loop_policy().get_event_loop()
+    try:
+        loop.run_until_complete(_do())
+    finally:
+        loop.close()

app/utils/utils.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import json
+import locale
+import os
+from pathlib import Path
+import threading
+from typing import Any
+from uuid import uuid4
+import urllib3
+from loguru import logger
+from app.models import const
+urllib3.disable_warnings()
+def get_response(status: int, data: Any = None, message: str = ""):
+    obj = {
+        "status": status,
+    }
+    if data:
+        obj["data"] = data
+    if message:
+        obj["message"] = message
+    return obj
+def to_json(obj):
+    try:
+        # Define a helper function to handle different types of objects
+        def serialize(o):
+            # If the object is a serializable type, return it directly
+            if isinstance(o, (int, float, bool, str)) or o is None:
+                return o
+            # If the object is binary data, convert it to a base64-encoded string
+            elif isinstance(o, bytes):
+                return "*** binary data ***"
+            # If the object is a dictionary, recursively process each key-value pair
+            elif isinstance(o, dict):
+                return {k: serialize(v) for k, v in o.items()}
+            # If the object is a list or tuple, recursively process each element
+            elif isinstance(o, (list, tuple)):
+                return [serialize(item) for item in o]
+            # If the object is a custom type, attempt to return its __dict__ attribute
+            elif hasattr(o, "__dict__"):
+                return serialize(o.__dict__)
+            # Return None for other cases (or choose to raise an exception)
+            else:
+                return None
+        # Use the serialize function to process the input object
+        serialized_obj = serialize(obj)
+        # Serialize the processed object into a JSON string
+        return json.dumps(serialized_obj, ensure_ascii=False, indent=4)
+    except Exception:
+        return None
+def get_uuid(remove_hyphen: bool = False):
+    u = str(uuid4())
+    if remove_hyphen:
+        u = u.replace("-", "")
+    return u
+def root_dir():
+    return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+def storage_dir(sub_dir: str = "", create: bool = False):
+    d = os.path.join(root_dir(), "storage")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if create and not os.path.exists(d):
+        os.makedirs(d)
+    return d
+def resource_dir(sub_dir: str = ""):
+    d = os.path.join(root_dir(), "resource")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    return d
+def task_dir(sub_dir: str = ""):
+    d = os.path.join(storage_dir(), "tasks")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+def font_dir(sub_dir: str = ""):
+    d = resource_dir("fonts")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+def song_dir(sub_dir: str = ""):
+    d = resource_dir("songs")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+def public_dir(sub_dir: str = ""):
+    d = resource_dir("public")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+def run_in_background(func, *args, **kwargs):
+    def run():
+        try:
+            func(*args, **kwargs)
+        except Exception as e:
+            logger.error(f"run_in_background error: {e}")
+    thread = threading.Thread(target=run)
+    thread.start()
+    return thread
+def time_convert_seconds_to_hmsm(seconds) -> str:
+    hours = int(seconds // 3600)
+    seconds = seconds % 3600
+    minutes = int(seconds // 60)
+    milliseconds = int(seconds * 1000) % 1000
+    seconds = int(seconds % 60)
+    return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, seconds, milliseconds)
+def text_to_srt(idx: int, msg: str, start_time: float, end_time: float) -> str:
+    start_time = time_convert_seconds_to_hmsm(start_time)
+    end_time = time_convert_seconds_to_hmsm(end_time)
+    srt = """%d
+%s --> %s
+%s
+        """ % (
+        idx,
+        start_time,
+        end_time,
+        msg,
+    )
+    return srt
+def str_contains_punctuation(word):
+    for p in const.PUNCTUATIONS:
+        if p in word:
+            return True
+    return False
+def split_string_by_punctuations(s):
+    result = []
+    txt = ""
+    previous_char = ""
+    next_char = ""
+    for i in range(len(s)):
+        char = s[i]
+        if char == "\n":
+            result.append(txt.strip())
+            txt = ""
+            continue
+        if i > 0:
+            previous_char = s[i - 1]
+        if i < len(s) - 1:
+            next_char = s[i + 1]
+        if char == "." and previous_char.isdigit() and next_char.isdigit():
+            # # In the case of "withdraw 10,000, charged at 2.5% fee", the dot in "2.5" should not be treated as a line break marker
+            txt += char
+            continue
+        if char not in const.PUNCTUATIONS:
+            txt += char
+        else:
+            result.append(txt.strip())
+            txt = ""
+    result.append(txt.strip())
+    # filter empty string
+    result = list(filter(None, result))
+    return result
+def md5(text):
+    import hashlib
+    return hashlib.md5(text.encode("utf-8")).hexdigest()
+def get_system_locale():
+    try:
+        loc = locale.getdefaultlocale()
+        # zh_CN, zh_TW return zh
+        # en_US, en_GB return en
+        language_code = loc[0].split("_")[0]
+        return language_code
+    except Exception:
+        return "en"
+def load_locales(i18n_dir):
+    _locales = {}
+    for root, dirs, files in os.walk(i18n_dir):
+        for file in files:
+            if file.endswith(".json"):
+                lang = file.split(".")[0]
+                with open(os.path.join(root, file), "r", encoding="utf-8") as f:
+                    _locales[lang] = json.loads(f.read())
+    return _locales
+def parse_extension(filename):
+    return Path(filename).suffix.lower().lstrip('.')

config.toml ADDED Viewed

	@@ -0,0 +1,214 @@

+[app]
+video_source = "pexels" # "pexels" or "pixabay"
+# 是否隐藏配置面板
+hide_config = false
+# Pexels API Key
+# Register at https://www.pexels.com/api/ to get your API key.
+# You can use multiple keys to avoid rate limits.
+# For example: pexels_api_keys = ["123adsf4567adf89","abd1321cd13efgfdfhi"]
+# 特别注意格式，Key 用英文双引号括起来，多个Key用逗号隔开
+pexels_api_keys = []
+# Pixabay API Key
+# Register at https://pixabay.com/api/docs/ to get your API key.
+# You can use multiple keys to avoid rate limits.
+# For example: pixabay_api_keys = ["123adsf4567adf89","abd1321cd13efgfdfhi"]
+# 特别注意格式，Key 用英文双引号括起来，多个Key用逗号隔开
+pixabay_api_keys = []
+# 支持的提供商 (Supported providers):
+#   openai
+#   moonshot    (月之暗面)
+#   azure
+#   qwen        (通义千问)
+#   deepseek
+#   gemini
+#   ollama
+#   g4f
+#   oneapi
+#   cloudflare
+#   ernie       (文心一言)
+llm_provider = "cloudflare"
+########## Pollinations AI Settings
+# Visit https://pollinations.ai/ to learn more
+# API Key is optional - leave empty for public access
+pollinations_api_key = ""
+# Default base URL for Pollinations API
+pollinations_base_url = "https://pollinations.ai/api/v1"
+# Default model for text generation
+pollinations_model_name = "openai-fast"
+########## Ollama Settings
+# No need to set it unless you want to use your own proxy
+ollama_base_url = ""
+# Check your available models at https://ollama.com/library
+ollama_model_name = ""
+########## OpenAI API Key
+# Get your API key at https://platform.openai.com/api-keys
+openai_api_key = ""
+# No need to set it unless you want to use your own proxy
+openai_base_url = ""
+# Check your available models at https://platform.openai.com/account/limits
+openai_model_name = "gpt-4o-mini"
+########## Moonshot API Key
+# Visit https://platform.moonshot.cn/console/api-keys to get your API key.
+moonshot_api_key = ""
+moonshot_base_url = "https://api.moonshot.cn/v1"
+moonshot_model_name = "moonshot-v1-8k"
+########## OneAPI API Key
+# Visit https://github.com/songquanpeng/one-api to get your API key
+oneapi_api_key = ""
+oneapi_base_url = ""
+oneapi_model_name = ""
+########## G4F
+# Visit https://github.com/xtekky/gpt4free to get more details
+# Supported model list: https://github.com/xtekky/gpt4free/blob/main/g4f/models.py
+g4f_model_name = "gpt-3.5-turbo"
+########## Azure API Key
+# Visit https://learn.microsoft.com/zh-cn/azure/ai-services/openai/ to get more details
+# API documentation: https://learn.microsoft.com/zh-cn/azure/ai-services/openai/reference
+azure_api_key = ""
+azure_base_url = ""
+azure_model_name = "gpt-35-turbo"        # replace with your model deployment name
+azure_api_version = "2024-02-15-preview"
+########## Gemini API Key
+gemini_api_key = ""
+gemini_model_name = "gemini-1.0-pro"
+########## Qwen API Key
+# Visit https://dashscope.console.aliyun.com/apiKey to get your API key
+# Visit below links to get more details
+# https://tongyi.aliyun.com/qianwen/
+# https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
+qwen_api_key = ""
+qwen_model_name = "qwen-max"
+########## DeepSeek API Key
+# Visit https://platform.deepseek.com/api_keys to get your API key
+deepseek_api_key = ""
+deepseek_base_url = "https://api.deepseek.com"
+deepseek_model_name = "deepseek-chat"
+# Subtitle Provider, "edge" or "whisper"
+# If empty, the subtitle will not be generated
+subtitle_provider = "edge"
+#
+# ImageMagick
+#
+# Once you have installed it, ImageMagick will be automatically detected, except on Windows!
+# On Windows, for example "C:\Program Files (x86)\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
+# Download from https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe
+# imagemagick_path = "C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16\\magick.exe"
+#
+# FFMPEG
+#
+# 通常情况下，ffmpeg 会被自动下载，并且会被自动检测到。
+# 但是如果你的环境有问题，无法自动下载，可能会遇到如下错误：
+#   RuntimeError: No ffmpeg exe could be found.
+#   Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
+# 此时你可以手动下载 ffmpeg 并设置 ffmpeg_path，下载地址：https://www.gyan.dev/ffmpeg/builds/
+# Under normal circumstances, ffmpeg is downloaded automatically and detected automatically.
+# However, if there is an issue with your environment that prevents automatic downloading, you might encounter the following error:
+#   RuntimeError: No ffmpeg exe could be found.
+#   Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
+# In such cases, you can manually download ffmpeg and set the ffmpeg_path, download link: https://www.gyan.dev/ffmpeg/builds/
+# ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
+#########################################################################################
+# 当视频生成成功后，API服务提供的视频下载接入点，默��为当前服务的地址和监听端口
+# 比如 http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
+# 如果你需要使用域名对外提供服务（一般会用nginx做代理），则可以设置为你的域名
+# 比如 https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
+# endpoint="https://xxxx.com"
+# When the video is successfully generated, the API service provides a download endpoint for the video, defaulting to the service's current address and listening port.
+# For example, http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
+# If you need to provide the service externally using a domain name (usually done with nginx as a proxy), you can set it to your domain name.
+# For example, https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
+# endpoint="https://xxxx.com"
+endpoint = ""
+# Video material storage location
+# material_directory = ""                    # Indicates that video materials will be downloaded to the default folder, the default folder is ./storage/cache_videos under the current project
+# material_directory = "/user/harry/videos"  # Indicates that video materials will be downloaded to a specified folder
+# material_directory = "task"                # Indicates that video materials will be downloaded to the current task's folder, this method does not allow sharing of already downloaded video materials
+# 视频素材存放位置
+# material_directory = ""                    #表示将视频素材下载到默认的文件夹，默认文件夹为当前项目下的 ./storage/cache_videos
+# material_directory = "/user/harry/videos"  #表示将视频素材下载到指定的文件夹中
+# material_directory = "task"                #表示将视频素材下载到当前任务的文件夹中，这种方式无法共享已经下载的视频素材
+material_directory = ""
+# Used for state management of the task
+enable_redis = false
+redis_host = "localhost"
+redis_port = 6379
+redis_db = 0
+redis_password = ""
+# 文生视频时的最大并发任务数
+max_concurrent_tasks = 5
+[whisper]
+# Only effective when subtitle_provider is "whisper"
+# Run on GPU with FP16
+# model = WhisperModel(model_size, device="cuda", compute_type="float16")
+# Run on GPU with INT8
+# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
+# Run on CPU with INT8
+# model = WhisperModel(model_size, device="cpu", compute_type="int8")
+# recommended model_size: "large-v3"
+model_size = "large-v3"
+# if you want to use GPU, set device="cuda"
+device = "CPU"
+compute_type = "int8"
+[proxy]
+### Use a proxy to access the Pexels API
+### Format: "http://<username>:<password>@<proxy>:<port>"
+### Example: "http://user:pass@proxy:1234"
+### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
+# http = "http://10.10.1.10:3128"
+# https = "http://10.10.1.10:1080"
+[azure]
+# Azure Speech API Key
+# Get your API key at https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices
+speech_key = ""
+speech_region = ""
+[siliconflow]
+# SiliconFlow API Key
+# Get your API key at https://siliconflow.cn
+api_key = ""
+[ui]
+# UI related settings
+# 是否隐藏日志信息
+# Whether to hide logs in the UI
+hide_log = false

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+moviepy==2.1.2
+streamlit==1.45.0
+edge_tts==6.1.19
+fastapi==0.115.6
+uvicorn==0.32.1
+openai==1.56.1
+faster-whisper==1.1.0
+loguru==0.7.3
+google.generativeai==0.8.3
+dashscope==1.20.14
+g4f==0.5.2.2
+azure-cognitiveservices-speech==1.41.1
+redis==5.2.0
+python-multipart==0.0.19
+pyyaml
+requests>=2.31.0