Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import tempfile, os, zipfile, traceback | |
translator_cache = {} | |
MODEL_MAP = { | |
("en", "zh"): "Helsinki-NLP/opus-mt-en-zh", | |
("zh", "en"): "Helsinki-NLP/opus-mt-zh-en", | |
("en", "ja"): "Helsinki-NLP/opus-mt-en-ja", | |
("ja", "en"): "Helsinki-NLP/opus-mt-ja-en", | |
} | |
def get_translator(src_lang, tgt_lang): | |
key = (src_lang, tgt_lang) | |
if key not in translator_cache: | |
if key in MODEL_MAP: | |
translator_cache[key] = pipeline("translation", model=MODEL_MAP[key]) | |
else: | |
raise ValueError(f"No model for {src_lang} to {tgt_lang}") | |
return translator_cache[key] | |
def safe_translate(text, src, tgt): | |
try: | |
if (src, tgt) in MODEL_MAP: | |
translator = get_translator(src, tgt) | |
return translator(text, max_length=512)[0]["translation_text"] | |
elif (src, tgt) == ("ja", "zh") or (src, tgt) == ("zh", "ja"): | |
mid = safe_translate(text, src, "en") | |
return safe_translate(mid, "en", tgt) | |
else: | |
return f"[Unsupported: {src}->{tgt}]" | |
except Exception as e: | |
return f"[Translation error: {str(e)}]" | |
def parse_srt(srt_text): | |
blocks = srt_text.strip().split("\n\n") | |
subtitles = [] | |
for block in blocks: | |
lines = block.splitlines() | |
if len(lines) >= 3: | |
idx = lines[0] | |
timestamp = lines[1] | |
text = " ".join(lines[2:]) | |
subtitles.append((idx, timestamp, text)) | |
return subtitles | |
def reassemble_srt(subtitles): | |
return "\n\n".join(f"{idx}\n{ts}\n{txt}" for idx, ts, txt in subtitles) | |
def process_file(file_obj, src_lang, tgt_lang, output_dir, error_log): | |
try: | |
with open(file_obj.name, "r", encoding="utf-8", errors="ignore") as f: | |
raw_text = f.read() | |
subtitles = parse_srt(raw_text) | |
translated_subs = [] | |
for idx, ts, txt in subtitles: | |
translated = safe_translate(txt, src_lang, tgt_lang) | |
bilingual = f"{txt}\n{translated}" | |
translated_subs.append((idx, ts, bilingual)) | |
output_path = os.path.join(output_dir, os.path.basename(file_obj.name)) | |
with open(output_path, "w", encoding="utf-8") as f: | |
f.write(reassemble_srt(translated_subs)) | |
except Exception as e: | |
error_log.append(f"File {file_obj.name} failed: {str(e)}\n{traceback.format_exc()}") | |
def batch_translate(files, src_lang, tgt_lang): | |
tmp_dir = tempfile.mkdtemp() | |
error_log = [] | |
for file_obj in files: | |
process_file(file_obj, src_lang, tgt_lang, tmp_dir, error_log) | |
zip_path = os.path.join(tmp_dir, "translated_srt.zip") | |
try: | |
with zipfile.ZipFile(zip_path, 'w') as zipf: | |
for name in os.listdir(tmp_dir): | |
path = os.path.join(tmp_dir, name) | |
if os.path.isfile(path) and name.endswith(".srt"): | |
zipf.write(path, arcname=name) | |
if error_log: | |
log_path = os.path.join(tmp_dir, "log.txt") | |
with open(log_path, "w") as logf: | |
logf.write("\n".join(error_log)) | |
zipf.write(log_path, arcname="log.txt") | |
return zip_path | |
except Exception as e: | |
fail_zip = os.path.join(tmp_dir, "fail.zip") | |
with zipfile.ZipFile(fail_zip, 'w') as zipf: | |
with open(os.path.join(tmp_dir, "log.txt"), "w") as logf: | |
logf.write(f"ZIP error: {str(e)}\n\n{traceback.format_exc()}") | |
zipf.write(os.path.join(tmp_dir, "log.txt"), arcname="log.txt") | |
return fail_zip | |
gr.Interface( | |
fn=batch_translate, | |
inputs=[ | |
gr.File(file_types=[".srt"], label="Upload SRT files", file_count="multiple"), | |
gr.Dropdown(["en", "zh", "ja"], label="Source Language", value="ja"), | |
gr.Dropdown(["en", "zh", "ja"], label="Target Language", value="zh"), | |
], | |
outputs=gr.File(label="Download Translated ZIP"), | |
title="Batch SRT Translator (EN-ZH-JA)", | |
description="Upload .srt subtitle files and translate between English, Chinese, and Japanese. Dual-language output with original + translation. ZIP output. Errors will be logged.", | |
).launch() |