import html
import logging
import os
import re
import unicodedata
from copy import copy
import deepl
import ollama
import openai
import xinference_client
import requests
from pdf2zh.cache import TranslationCache
from azure.ai.translation.text import TextTranslationClient
from azure.core.credentials import AzureKeyCredential
from tencentcloud.common import credential
from tencentcloud.tmt.v20180321.tmt_client import TmtClient
from tencentcloud.tmt.v20180321.models import TextTranslateRequest
from tencentcloud.tmt.v20180321.models import TextTranslateResponse
import argostranslate.package
import argostranslate.translate
import json
from pdf2zh.config import ConfigManager
def remove_control_characters(s):
return "".join(ch for ch in s if unicodedata.category(ch)[0] != "C")
class BaseTranslator:
name = "base"
envs = {}
lang_map = {}
CustomPrompt = False
ignore_cache = False
def __init__(self, lang_in, lang_out, model):
lang_in = self.lang_map.get(lang_in.lower(), lang_in)
lang_out = self.lang_map.get(lang_out.lower(), lang_out)
self.lang_in = lang_in
self.lang_out = lang_out
self.model = model
self.cache = TranslationCache(
self.name,
{
"lang_in": lang_in,
"lang_out": lang_out,
"model": model,
},
)
def set_envs(self, envs):
# Detach from self.__class__.envs
# Cannot use self.envs = copy(self.__class__.envs)
# because if set_envs called twice, the second call will override the first call
self.envs = copy(self.envs)
if ConfigManager.get_translator_by_name(self.name):
self.envs = ConfigManager.get_translator_by_name(self.name)
needUpdate = False
for key in self.envs:
if key in os.environ:
self.envs[key] = os.environ[key]
needUpdate = True
if needUpdate:
ConfigManager.set_translator_by_name(self.name, self.envs)
if envs is not None:
for key in envs:
self.envs[key] = envs[key]
ConfigManager.set_translator_by_name(self.name, self.envs)
def add_cache_impact_parameters(self, k: str, v):
"""
Add parameters that affect the translation quality to distinguish the translation effects under different parameters.
:param k: key
:param v: value
"""
self.cache.add_params(k, v)
def translate(self, text, ignore_cache=False):
"""
Translate the text, and the other part should call this method.
:param text: text to translate
:return: translated text
"""
if not (self.ignore_cache or ignore_cache):
cache = self.cache.get(text)
if cache is not None:
return cache
translation = self.do_translate(text)
self.cache.set(text, translation)
return translation
def do_translate(self, text):
"""
Actual translate text, override this method
:param text: text to translate
:return: translated text
"""
raise NotImplementedError
def prompt(self, text, prompt):
if prompt:
context = {
"lang_in": self.lang_in,
"lang_out": self.lang_out,
"text": text,
}
return eval(prompt.safe_substitute(context))
else:
return [
{
"role": "system",
"content": "You are a professional,authentic machine translation engine. Only Output the translated text, do not include any other text.",
},
{
"role": "user",
"content": f"Translate the following markdown source text to {self.lang_out}. Keep the formula notation {{v*}} unchanged. Output translation directly without any additional text.\nSource Text: {text}\nTranslated Text:", # noqa: E501
},
]
def __str__(self):
return f"{self.name} {self.lang_in} {self.lang_out} {self.model}"
def get_rich_text_left_placeholder(self, id: int):
return f""
def get_rich_text_right_placeholder(self, id: int):
return f""
def get_formular_placeholder(self, id: int):
return self.get_rich_text_left_placeholder(
id
) + self.get_rich_text_right_placeholder(id)
class GoogleTranslator(BaseTranslator):
name = "google"
lang_map = {"zh": "zh-CN"}
def __init__(self, lang_in, lang_out, model, **kwargs):
super().__init__(lang_in, lang_out, model)
self.session = requests.Session()
self.endpoint = "http://translate.google.com/m"
self.headers = {
"User-Agent": "Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.1;SV1;.NET CLR 1.1.4322;.NET CLR 2.0.50727;.NET CLR 3.0.04506.30)" # noqa: E501
}
def do_translate(self, text):
text = text[:5000] # google translate max length
response = self.session.get(
self.endpoint,
params={"tl": self.lang_out, "sl": self.lang_in, "q": text},
headers=self.headers,
)
re_result = re.findall(
r'(?s)class="(?:t0|result-container)">(.*?)<', response.text
)
if response.status_code == 400:
result = "IRREPARABLE TRANSLATION ERROR"
else:
response.raise_for_status()
result = html.unescape(re_result[0])
return remove_control_characters(result)
class BingTranslator(BaseTranslator):
# https://github.com/immersive-translate/old-immersive-translate/blob/6df13da22664bea2f51efe5db64c63aca59c4e79/src/background/translationService.js
name = "bing"
lang_map = {"zh": "zh-Hans"}
def __init__(self, lang_in, lang_out, model, **kwargs):
super().__init__(lang_in, lang_out, model)
self.session = requests.Session()
self.endpoint = "https://www.bing.com/translator"
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", # noqa: E501
}
def find_sid(self):
response = self.session.get(self.endpoint)
response.raise_for_status()
url = response.url[:-10]
ig = re.findall(r"\"ig\":\"(.*?)\"", response.text)[0]
iid = re.findall(r"data-iid=\"(.*?)\"", response.text)[-1]
key, token = re.findall(
r"params_AbusePreventionHelper\s=\s\[(.*?),\"(.*?)\",", response.text
)[0]
return url, ig, iid, key, token
def do_translate(self, text):
text = text[:1000] # bing translate max length
url, ig, iid, key, token = self.find_sid()
response = self.session.post(
f"{url}ttranslatev3?IG={ig}&IID={iid}",
data={
"fromLang": self.lang_in,
"to": self.lang_out,
"text": text,
"token": token,
"key": key,
},
headers=self.headers,
)
response.raise_for_status()
return response.json()[0]["translations"][0]["text"]
class DeepLTranslator(BaseTranslator):
# https://github.com/DeepLcom/deepl-python
name = "deepl"
envs = {
"DEEPL_AUTH_KEY": None,
}
lang_map = {"zh": "zh-Hans"}
def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
self.set_envs(envs)
super().__init__(lang_in, lang_out, model)
auth_key = self.envs["DEEPL_AUTH_KEY"]
self.client = deepl.Translator(auth_key)
def do_translate(self, text):
response = self.client.translate_text(
text, target_lang=self.lang_out, source_lang=self.lang_in
)
return response.text
class DeepLXTranslator(BaseTranslator):
# https://deeplx.owo.network/endpoints/free.html
name = "deeplx"
envs = {
"DEEPLX_ENDPOINT": "https://api.deepl.com/translate",
"DEEPLX_ACCESS_TOKEN": None,
}
lang_map = {"zh": "zh-Hans"}
def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
self.set_envs(envs)
super().__init__(lang_in, lang_out, model)
self.endpoint = self.envs["DEEPLX_ENDPOINT"]
self.session = requests.Session()
auth_key = self.envs["DEEPLX_ACCESS_TOKEN"]
if auth_key:
self.endpoint = f"{self.endpoint}?token={auth_key}"
def do_translate(self, text):
response = self.session.post(
self.endpoint,
json={
"source_lang": self.lang_in,
"target_lang": self.lang_out,
"text": text,
},
)
response.raise_for_status()
return response.json()["data"]
class OllamaTranslator(BaseTranslator):
# https://github.com/ollama/ollama-python
name = "ollama"
envs = {
"OLLAMA_HOST": "http://127.0.0.1:11434",
"OLLAMA_MODEL": "gemma2",
}
CustomPrompt = True
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
if not model:
model = self.envs["OLLAMA_MODEL"]
super().__init__(lang_in, lang_out, model)
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
self.client = ollama.Client(host=self.envs["OLLAMA_HOST"])
self.prompttext = prompt
self.add_cache_impact_parameters("temperature", self.options["temperature"])
def do_translate(self, text):
maxlen = max(2000, len(text) * 5)
for model in self.model.split(";"):
try:
response = ""
stream = self.client.chat(
model=model,
options=self.options,
messages=self.prompt(text, self.prompttext),
stream=True,
)
in_think_block = False
is_deepseek_r1 = "deepseek-r1" in model
for chunk in stream:
chunk = chunk["message"]["content"]
# 只在 deepseek-r1 模型下检查 块
if is_deepseek_r1:
if "" in chunk:
in_think_block = True
chunk = chunk.split("")[0]
if "" in chunk:
in_think_block = False
chunk = chunk.split("")[1]
if not in_think_block:
response += chunk
else:
response += chunk
if len(response) > maxlen:
raise Exception("Response too long")
return response.strip()
except Exception as e:
print(e)
raise Exception("All models failed")
class XinferenceTranslator(BaseTranslator):
# https://github.com/xorbitsai/inference
name = "xinference"
envs = {
"XINFERENCE_HOST": "http://127.0.0.1:9997",
"XINFERENCE_MODEL": "gemma-2-it",
}
CustomPrompt = True
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
if not model:
model = self.envs["XINFERENCE_MODEL"]
super().__init__(lang_in, lang_out, model)
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
self.client = xinference_client.RESTfulClient(self.envs["XINFERENCE_HOST"])
self.prompttext = prompt
self.add_cache_impact_parameters("temperature", self.options["temperature"])
def do_translate(self, text):
maxlen = max(2000, len(text) * 5)
for model in self.model.split(";"):
try:
xf_model = self.client.get_model(model)
xf_prompt = self.prompt(text, self.prompttext)
xf_prompt = [
{
"role": "user",
"content": xf_prompt[0]["content"]
+ "\n"
+ xf_prompt[1]["content"],
}
]
response = xf_model.chat(
generate_config=self.options,
messages=xf_prompt,
)
response = response["choices"][0]["message"]["content"].replace(
"", ""
)
if len(response) > maxlen:
raise Exception("Response too long")
return response.strip()
except Exception as e:
print(e)
raise Exception("All models failed")
class OpenAITranslator(BaseTranslator):
# https://github.com/openai/openai-python
name = "openai"
envs = {
"OPENAI_BASE_URL": "https://api.openai.com/v1",
"OPENAI_API_KEY": None,
"OPENAI_MODEL": "gpt-4o-mini",
}
CustomPrompt = True
def __init__(
self,
lang_in,
lang_out,
model,
base_url=None,
api_key=None,
envs=None,
prompt=None,
):
self.set_envs(envs)
if not model:
model = self.envs["OPENAI_MODEL"]
super().__init__(lang_in, lang_out, model)
self.options = {"temperature": 0} # 随机采样可能会打断公式标记
self.client = openai.OpenAI(
base_url=base_url or self.envs["OPENAI_BASE_URL"],
api_key=api_key or self.envs["OPENAI_API_KEY"],
)
self.prompttext = prompt
self.add_cache_impact_parameters("temperature", self.options["temperature"])
def do_translate(self, text) -> str:
response = self.client.chat.completions.create(
model=self.model,
**self.options,
messages=self.prompt(text, self.prompttext),
)
return response.choices[0].message.content.strip()
def get_formular_placeholder(self, id: int):
return "{{v" + str(id) + "}}"
def get_rich_text_left_placeholder(self, id: int):
return self.get_formular_placeholder(id)
def get_rich_text_right_placeholder(self, id: int):
return self.get_formular_placeholder(id + 1)
class AzureOpenAITranslator(BaseTranslator):
name = "azure-openai"
envs = {
"AZURE_OPENAI_BASE_URL": None, # e.g. "https://xxx.openai.azure.com"
"AZURE_OPENAI_API_KEY": None,
"AZURE_OPENAI_MODEL": "gpt-4o-mini",
}
CustomPrompt = True
def __init__(
self,
lang_in,
lang_out,
model,
base_url=None,
api_key=None,
envs=None,
prompt=None,
):
self.set_envs(envs)
base_url = self.envs["AZURE_OPENAI_BASE_URL"]
if not model:
model = self.envs["AZURE_OPENAI_MODEL"]
super().__init__(lang_in, lang_out, model)
self.options = {"temperature": 0}
self.client = openai.AzureOpenAI(
azure_endpoint=base_url,
azure_deployment=model,
api_version="2024-06-01",
api_key=api_key,
)
self.prompttext = prompt
self.add_cache_impact_parameters("temperature", self.options["temperature"])
def do_translate(self, text) -> str:
response = self.client.chat.completions.create(
model=self.model,
**self.options,
messages=self.prompt(text, self.prompttext),
)
return response.choices[0].message.content.strip()
class ModelScopeTranslator(OpenAITranslator):
name = "modelscope"
envs = {
"MODELSCOPE_BASE_URL": "https://api-inference.modelscope.cn/v1",
"MODELSCOPE_API_KEY": None,
"MODELSCOPE_MODEL": "Qwen/Qwen2.5-32B-Instruct",
}
CustomPrompt = True
def __init__(
self,
lang_in,
lang_out,
model,
base_url=None,
api_key=None,
envs=None,
prompt=None,
):
self.set_envs(envs)
base_url = "https://api-inference.modelscope.cn/v1"
api_key = self.envs["MODELSCOPE_API_KEY"]
if not model:
model = self.envs["MODELSCOPE_MODEL"]
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
self.prompttext = prompt
class ZhipuTranslator(OpenAITranslator):
# https://bigmodel.cn/dev/api/thirdparty-frame/openai-sdk
name = "zhipu"
envs = {
"ZHIPU_API_KEY": None,
"ZHIPU_MODEL": "glm-4-flash",
}
CustomPrompt = True
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
base_url = "https://open.bigmodel.cn/api/paas/v4"
api_key = self.envs["ZHIPU_API_KEY"]
if not model:
model = self.envs["ZHIPU_MODEL"]
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
self.prompttext = prompt
def do_translate(self, text) -> str:
try:
response = self.client.chat.completions.create(
model=self.model,
**self.options,
messages=self.prompt(text, self.prompttext),
)
except openai.BadRequestError as e:
if (
json.loads(response.choices[0].message.content.strip())["error"]["code"]
== "1301"
):
return "IRREPARABLE TRANSLATION ERROR"
raise e
return response.choices[0].message.content.strip()
class SiliconTranslator(OpenAITranslator):
# https://docs.siliconflow.cn/quickstart
name = "silicon"
envs = {
"SILICON_API_KEY": None,
"SILICON_MODEL": "Qwen/Qwen2.5-7B-Instruct",
}
CustomPrompt = True
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
base_url = "https://api.siliconflow.cn/v1"
api_key = self.envs["SILICON_API_KEY"]
if not model:
model = self.envs["SILICON_MODEL"]
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
self.prompttext = prompt
class GeminiTranslator(OpenAITranslator):
# https://ai.google.dev/gemini-api/docs/openai
name = "gemini"
envs = {
"GEMINI_API_KEY": None,
"GEMINI_MODEL": "gemini-1.5-flash",
}
CustomPrompt = True
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
base_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
api_key = self.envs["GEMINI_API_KEY"]
if not model:
model = self.envs["GEMINI_MODEL"]
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
self.prompttext = prompt
class AzureTranslator(BaseTranslator):
# https://github.com/Azure/azure-sdk-for-python
name = "azure"
envs = {
"AZURE_ENDPOINT": "https://api.translator.azure.cn",
"AZURE_API_KEY": None,
}
lang_map = {"zh": "zh-Hans"}
def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
self.set_envs(envs)
super().__init__(lang_in, lang_out, model)
endpoint = self.envs["AZURE_ENDPOINT"]
api_key = self.envs["AZURE_API_KEY"]
credential = AzureKeyCredential(api_key)
self.client = TextTranslationClient(
endpoint=endpoint, credential=credential, region="chinaeast2"
)
# https://github.com/Azure/azure-sdk-for-python/issues/9422
logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
logger.setLevel(logging.WARNING)
def do_translate(self, text) -> str:
response = self.client.translate(
body=[text],
from_language=self.lang_in,
to_language=[self.lang_out],
)
translated_text = response[0].translations[0].text
return translated_text
class TencentTranslator(BaseTranslator):
# https://github.com/TencentCloud/tencentcloud-sdk-python
name = "tencent"
envs = {
"TENCENTCLOUD_SECRET_ID": None,
"TENCENTCLOUD_SECRET_KEY": None,
}
def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
self.set_envs(envs)
super().__init__(lang_in, lang_out, model)
cred = credential.DefaultCredentialProvider().get_credential()
self.client = TmtClient(cred, "ap-beijing")
self.req = TextTranslateRequest()
self.req.Source = self.lang_in
self.req.Target = self.lang_out
self.req.ProjectId = 0
def do_translate(self, text):
self.req.SourceText = text
resp: TextTranslateResponse = self.client.TextTranslate(self.req)
return resp.TargetText
class AnythingLLMTranslator(BaseTranslator):
name = "anythingllm"
envs = {
"AnythingLLM_URL": None,
"AnythingLLM_APIKEY": None,
}
CustomPrompt = True
def __init__(self, lang_out, lang_in, model, envs=None, prompt=None):
self.set_envs(envs)
super().__init__(lang_out, lang_in, model)
self.api_url = self.envs["AnythingLLM_URL"]
self.api_key = self.envs["AnythingLLM_APIKEY"]
self.headers = {
"accept": "application/json",
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
self.prompttext = prompt
def do_translate(self, text):
messages = self.prompt(text, self.prompttext)
payload = {
"message": messages,
"mode": "chat",
"sessionId": "translation_expert",
}
response = requests.post(
self.api_url, headers=self.headers, data=json.dumps(payload)
)
response.raise_for_status()
data = response.json()
if "textResponse" in data:
return data["textResponse"].strip()
class DifyTranslator(BaseTranslator):
name = "dify"
envs = {
"DIFY_API_URL": None, # 填写实际 Dify API 地址
"DIFY_API_KEY": None, # 替换为实际 API 密钥
}
def __init__(self, lang_out, lang_in, model, envs=None, **kwargs):
self.set_envs(envs)
super().__init__(lang_out, lang_in, model)
self.api_url = self.envs["DIFY_API_URL"]
self.api_key = self.envs["DIFY_API_KEY"]
def do_translate(self, text):
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
payload = {
"inputs": {
"lang_out": self.lang_out,
"lang_in": self.lang_in,
"text": text,
},
"response_mode": "blocking",
"user": "translator-service",
}
# 向 Dify 服务器发送请求
response = requests.post(
self.api_url, headers=headers, data=json.dumps(payload)
)
response.raise_for_status()
response_data = response.json()
# 解析响应
return response_data.get("data", {}).get("outputs", {}).get("text", [])
class ArgosTranslator(BaseTranslator):
name = "argos"
def __init__(self, lang_in, lang_out, model, **kwargs):
super().__init__(lang_in, lang_out, model)
lang_in = self.lang_map.get(lang_in.lower(), lang_in)
lang_out = self.lang_map.get(lang_out.lower(), lang_out)
self.lang_in = lang_in
self.lang_out = lang_out
argostranslate.package.update_package_index()
available_packages = argostranslate.package.get_available_packages()
try:
available_package = list(
filter(
lambda x: x.from_code == self.lang_in
and x.to_code == self.lang_out,
available_packages,
)
)[0]
except Exception:
raise ValueError(
"lang_in and lang_out pair not supported by Argos Translate."
)
download_path = available_package.download()
argostranslate.package.install_from_path(download_path)
def translate(self, text):
# Translate
installed_languages = argostranslate.translate.get_installed_languages()
from_lang = list(filter(lambda x: x.code == self.lang_in, installed_languages))[
0
]
to_lang = list(filter(lambda x: x.code == self.lang_out, installed_languages))[
0
]
translation = from_lang.get_translation(to_lang)
translatedText = translation.translate(text)
return translatedText
class GorkTranslator(OpenAITranslator):
# https://docs.x.ai/docs/overview#getting-started
name = "grok"
envs = {
"GORK_API_KEY": None,
"GORK_MODEL": "grok-2-1212",
}
CustomPrompt = True
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
base_url = "https://api.x.ai/v1"
api_key = self.envs["GORK_API_KEY"]
if not model:
model = self.envs["GORK_MODEL"]
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
self.prompttext = prompt
class GroqTranslator(OpenAITranslator):
name = "groq"
envs = {
"GROQ_API_KEY": None,
"GROQ_MODEL": "llama-3-3-70b-versatile",
}
CustomPrompt = True
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
base_url = "https://api.groq.com/openai/v1"
api_key = self.envs["GROQ_API_KEY"]
if not model:
model = self.envs["GROQ_MODEL"]
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
self.prompttext = prompt
class DeepseekTranslator(OpenAITranslator):
name = "deepseek"
envs = {
"DEEPSEEK_API_KEY": None,
"DEEPSEEK_MODEL": "deepseek-chat",
}
CustomPrompt = True
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
base_url = "https://api.deepseek.com/v1"
api_key = self.envs["DEEPSEEK_API_KEY"]
if not model:
model = self.envs["DEEPSEEK_MODEL"]
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
self.prompttext = prompt
class OpenAIlikedTranslator(OpenAITranslator):
name = "openailiked"
envs = {
"OPENAILIKED_BASE_URL": None,
"OPENAILIKED_API_KEY": None,
"OPENAILIKED_MODEL": None,
}
CustomPrompt = True
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
if self.envs["OPENAILIKED_BASE_URL"]:
base_url = self.envs["OPENAILIKED_BASE_URL"]
else:
raise ValueError("The OPENAILIKED_BASE_URL is missing.")
if not model:
if self.envs["OPENAILIKED_MODEL"]:
model = self.envs["OPENAILIKED_MODEL"]
else:
raise ValueError("The OPENAILIKED_MODEL is missing.")
if self.envs["OPENAILIKED_API_KEY"] is None:
api_key = "openailiked"
else:
api_key = self.envs["OPENAILIKED_API_KEY"]
super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key)
self.prompttext = prompt