Spaces:
Sleeping
Sleeping
File size: 14,065 Bytes
42d1e42 1b1c0d8 f319272 42d1e42 b02590a 42d1e42 7e4dd83 42d1e42 8957aec 42d1e42 7e4dd83 42d1e42 1b1c0d8 42d1e42 8957aec 42d1e42 8957aec 159b6fa 8957aec 7e4dd83 8957aec 159b6fa d229b84 42d1e42 d229b84 4eb685d 8957aec d229b84 4eb685d d229b84 8957aec 42d1e42 d229b84 42d1e42 ef58ab4 42d1e42 159b6fa 42d1e42 159b6fa 4f7e3cd 42d1e42 159b6fa 42d1e42 d229b84 ef58ab4 42d1e42 d229b84 42d1e42 b02590a ef58ab4 42d1e42 d229b84 42d1e42 4eb685d d229b84 42d1e42 4eb685d 42d1e42 d229b84 42d1e42 d229b84 42d1e42 d229b84 42d1e42 d229b84 42d1e42 d229b84 42d1e42 d229b84 f319272 4eb685d d229b84 4eb685d d229b84 4eb685d 42d1e42 4eb685d d583cc9 b05265a d583cc9 f319272 b05265a d583cc9 42d1e42 5d333b9 42d1e42 4eb685d 42d1e42 eaea5aa 42d1e42 eaea5aa 42d1e42 4eb685d 42d1e42 eaea5aa 42d1e42 d229b84 42d1e42 eaea5aa 42d1e42 d229b84 eaea5aa 42d1e42 d229b84 42d1e42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 |
"""Module for gradio interfaces."""
import os
from pathlib import Path
import gradio as gr
from translator.content import (
fill_scaffold,
get_content,
get_full_prompt,
llm_translate,
preprocess_content,
)
from translator.retriever import report, get_github_issue_open_pr, get_github_repo_files
# GitHub PR Agent import
try:
from pr_generator.agent import GitHubPRAgent
GITHUB_PR_AVAILABLE = True
except ImportError as e:
print(f"β οΈ GitHub PR Agent is not available: {e}")
GITHUB_PR_AVAILABLE = False
import json
from logger.github_logger import GitHubLogger
def report_translation_target_files(
project: str, translate_lang: str, top_k: int = 1
) -> tuple[str, list[list[str]]]:
"""Return the top-k files that need translation, excluding files already in progress.
Args:
project: Project to translate (e.g., "transformers", "smolagents")
translate_lang: Target language to translate
top_k: Number of top-first files to return for translation. (Default 1)
"""
# Get repo files once to avoid duplicate API calls
all_repo_files = get_github_repo_files(project)
# Get all available files for translation using the file list
all_status_report, all_filepath_list = report(project, translate_lang, top_k * 2, all_repo_files) # Get more to account for filtering
# Get files in progress using the same file list
docs_in_progress, pr_info_list = get_github_issue_open_pr(project, translate_lang, all_repo_files)
# Filter out files that are already in progress
available_files = [f for f in all_filepath_list if f not in docs_in_progress]
# Take only the requested number
filepath_list = available_files[:top_k]
# Build combined status report
status_report = all_status_report
if docs_in_progress:
status_report += f"\n\nπ€ Found {len(docs_in_progress)} files in progress for translation:"
for i, file in enumerate(docs_in_progress):
status_report += f"\n{i+1}. [`{file}`]({pr_info_list[i]})"
status_report += f"\n\nπ Showing {len(filepath_list)} available files (excluding in-progress):"
return status_report, [[file] for file in filepath_list]
def translate_docs(lang: str, file_path: str, additional_instruction: str = "", project: str = "transformers", force_retranslate: bool = False) -> tuple[str, str]:
"""Translate documentation."""
# Check if translation already exists (unless force retranslate is enabled)
translation_file_path = (
Path(__file__).resolve().parent.parent
/ f"translation_result/{file_path}"
)
if not force_retranslate and translation_file_path.exists():
print(f"π Found existing translation: {translation_file_path}")
with open(translation_file_path, "r", encoding="utf-8") as f:
existing_content = f.read()
if existing_content.strip():
existing_msg = f"β»οΈ **Existing translation loaded** (no tokens used)\nπ **File:** `{file_path}`\nπ
**Loaded from:** `{translation_file_path}`\nπ‘ **To retranslate:** Check 'Force Retranslate' option."
return existing_msg, existing_content
# step 1. Get content from file path
content = get_content(file_path, project)
to_translate = preprocess_content(content)
# step 2. Prepare prompt with docs content
if lang == "ko":
translation_lang = "Korean"
to_translate_with_prompt = get_full_prompt(translation_lang, to_translate, additional_instruction)
print("to_translate_with_prompt:\n", to_translate_with_prompt)
# step 3. Translate with LLM
# TODO: MCP clilent λκΈΈ λΆλΆ
callback_result, translated_content = llm_translate(to_translate_with_prompt)
print("translated_content:\n")
print(translated_content)
if translated_content.startswith("```md\n") and translated_content.endswith("```"):
print("Satisfied translated_content.startswith ``` md")
translated_content = translated_content[5:-3].strip()
# step 4. Add scaffold to translation result
translated_doc = fill_scaffold(content, to_translate, translated_content)
print("translated_doc:\n")
print(translated_doc)
return callback_result, translated_doc
def translate_docs_interactive(
translate_lang: str, selected_files: list[list[str]], additional_instruction: str = "", project: str = "transformers", force_retranslate: bool = False
) -> tuple[str, str]:
"""Interactive translation function that processes files one by one.
Args:
translate_lang: Target language to translate
selected_files: List of file paths to translate
"""
# Extract file paths from the dataframe format
file_paths = [row[0] for row in selected_files if row and len(row) > 0]
# Start with the first file
current_file = file_paths[0]
callback_result, translated_content = translate_docs(translate_lang, current_file, additional_instruction, project, force_retranslate)
# Check if existing translation was loaded
if isinstance(callback_result, str) and "Existing translation loaded" in callback_result:
status = callback_result # Use the existing translation message
else:
if force_retranslate:
status = f"π **Force Retranslation completed**: `{current_file}` β `{translate_lang}`\n\n"
else:
status = f"β
Translation completed: `{current_file}` β `{translate_lang}`\n\n"
status += f"π° Used token and cost: \n```\n{callback_result}\n```"
print(callback_result)
print(status)
return status, translated_content
def generate_github_pr(
target_language: str,
filepath: str,
translated_content: str = None,
github_config: dict = None,
en_title: str = None,
project: str = "transformers",
) -> str:
"""Generate a GitHub PR for translated documentation.
Args:
target_language: Target language for translation (e.g., "ko")
filepath: Original file path (e.g., "docs/source/en/accelerator_selection.md")
translated_content: Translated content (if None, read from file)
github_config: GitHub configuration dictionary
en_title: English title for toctree mapping
Returns:
PR creation result message
"""
if not GITHUB_PR_AVAILABLE:
return "β GitHub PR Agent is not available. Please install required libraries."
if not github_config:
return "β GitHub configuration not provided. Please set up GitHub token, owner, and repository in Configuration panel."
# Validate required configuration
required_fields = ["token", "owner", "repo_name", "reference_pr_url"]
missing_fields = [
field for field in required_fields if not github_config.get(field)
]
if missing_fields:
return f"β Missing required GitHub configuration: {', '.join(missing_fields)}\n\nπ‘ Go to Configuration panel and set:\n" + "\n".join([f" β’ {field}" for field in missing_fields])
# Set token in environment for the agent.
os.environ["GITHUB_TOKEN"] = github_config["token"]
try:
# Read translated content from file if not provided
if translated_content is None:
translation_file_path = (
Path(__file__).resolve().parent.parent
/ f"translation_result/{filepath}"
)
if not translation_file_path.exists():
return f"β Translation file not found: {translation_file_path}\n\nπ‘ Please complete translation first in Tab 2 for file: {filepath}"
with open(translation_file_path, "r", encoding="utf-8") as f:
translated_content = f.read()
if not translated_content or not translated_content.strip():
return f"β Translated content is empty for file: {filepath}\n\nπ‘ Please complete translation first in Tab 2."
# Execute GitHub PR Agent
# Get base repository from project config
from translator.project_config import get_project_config
project_config = get_project_config(project)
base_repo_path = project_config.repo_url.replace("https://github.com/", "")
base_owner, base_repo = base_repo_path.split("/")
print(f"π Starting GitHub PR creation...")
print(f" π File: {filepath}")
print(f" π Language: {target_language}")
print(f" π Reference PR: {github_config['reference_pr_url']}")
print(f" π User Fork: {github_config['owner']}/{github_config['repo_name']}")
print(f" π― Base Repository: {base_owner}/{base_repo}")
agent = GitHubPRAgent(
user_owner=github_config["owner"],
user_repo=github_config["repo_name"],
base_owner=base_owner,
base_repo=base_repo,
)
result = agent.run_translation_pr_workflow(
reference_pr_url=github_config["reference_pr_url"],
target_language=target_language,
filepath=filepath,
translated_doc=translated_content,
base_branch=github_config.get("base_branch", "main"),
)
# TEST CODE
# result = {
# 'status': 'partial_success',
# 'branch': 'ko-attention_interface',
# 'file_path': 'docs/source/ko/attention_interface.md',
# 'message': 'File was saved and commit was successful.\nPR creation failed: ERROR: Existing PR found: https://github.com/Jwaminju/transformers/pull/1', 'error_details': 'ERROR: Existing PR found: https://github.com/Jwaminju/transformers/pull/1'
# }
# Process toctree update after successful translation PR
toctree_result = None
if en_title:
from agent.toctree_handler import TocTreeHandler
toctree_handler = TocTreeHandler(project)
toctree_result = toctree_handler.update_toctree_after_translation(
result, filepath, agent, github_config, project
)
# Process result
# Generate toctree status message (shared for both success and partial_success)
toctree_status = ""
if toctree_result:
if toctree_result["status"] == "success":
toctree_status = f"\nπ **Toctree Updated:** β
{toctree_result['message']}"
else:
toctree_status = f"\nπ **Toctree Update Failed:** β {toctree_result['message']}"
# Append full result JSON to dedicated GitHub logging repository (always)
try:
log_data = result.copy()
if toctree_result:
log_data["toctree_result"] = toctree_result
log_entry = json.dumps(log_data, ensure_ascii=False) + "\n"
log_res = GitHubLogger().append_jsonl(log_entry)
print(f"π Log append result: {log_res}")
except Exception as e:
print(f"β Failed to append PR log via GitHub API: {e}")
if result["status"] == "success":
return f"""β
**GitHub PR Creation Successful!**
π **PR URL:** {result.get('pr_url', 'NO_PR_URL')}
πΏ **Branch:** {result["branch"]}
π **File:** {result["file_path"]}{toctree_status}
{result["message"]}"""
elif result["status"] == "partial_success":
error_details = result.get("error_details", "Unknown error")
# Check if it's "existing PR" case (not really an error)
if "Existing PR found" in error_details:
existing_pr_url = error_details.split(": ")[-1] if ": " in error_details else "Unknown"
return f"""π **Translation Updated Successfully**
π― **Selected Project:** {project}
πΏ **Branch:** {result["branch"]}
π **File:** {result["file_path"]}{toctree_status}
π **Existing PR Updated:** {existing_pr_url}
β
Your translation has been added to the existing PR. The file and toctree have been successfully updated!"""
else:
# Actual error case
return f"""β οΈ **Partial Success**
π― **Selected Project:** {project}
π **User Fork:** {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')}
π― **Target Base:** {base_owner}/{base_repo}
πΏ **Branch:** {result["branch"]}
π **File:** {result["file_path"]}{toctree_status}
{result["message"]}
**Error Details:**
{error_details}
π‘ **Project-Repository Mismatch Check:**
- Selected project '{project}' should match repository '{github_config.get('repo_name', 'REPO')}'
- For smolagents: use Jwaminju/smolagents fork
- For transformers: use Jwaminju/transformers fork"""
else:
error_details = result.get("error_details", "No additional details")
return f"""β **GitHub PR Creation Failed**
π― **Selected Project:** {project}
π **User Fork:** {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')}
π― **Target Base:** {base_owner}/{base_repo}
**Error Message:**
{result["message"]}
**Error Details:**
{error_details}
π‘ **Project-Repository Mismatch:**
Selected project '{project}' but configured repository '{github_config.get('repo_name', 'REPO')}'
β’ For smolagents project: use 'smolagents' repository
β’ For transformers project: use 'transformers' repository"""
except Exception as e:
error_msg = f"""β **Unexpected Error During PR Creation**
**Error:** {str(e)}
**Configuration:**
β’ Project: {project}
β’ File: {filepath}
β’ Target: {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')} β {base_owner if 'base_owner' in locals() else 'BASE'}/{base_repo if 'base_repo' in locals() else 'REPO'}"""
print(error_msg)
return error_msg
# Backward compatibility function (replaces old mock function)
def mock_generate_PR():
"""Backward compatibility function - returns warning message only"""
return (
"β οΈ mock_generate_PR() is deprecated. Please use generate_github_pr() instead."
)
|