Spaces:
Sleeping
Sleeping
Merge pull request #4 from Jwaminju/part-of-support-smolagent
Browse files- agent/handler.py +236 -65
- agent/toctree_handler.py +13 -4
- agent/workflow.py +71 -31
- app.py +109 -47
- pr_generator/agent.py +17 -12
- translator/content.py +8 -5
- translator/project_config.py +48 -0
- translator/retriever.py +93 -27
agent/handler.py
CHANGED
|
@@ -13,25 +13,48 @@ from agent.workflow import (
|
|
| 13 |
)
|
| 14 |
from pr_generator.searcher import find_reference_pr_simple_stream
|
| 15 |
from translator.content import get_full_prompt, get_content, preprocess_content
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
# State management
|
| 19 |
class ChatState:
|
| 20 |
def __init__(self):
|
| 21 |
self.step = "welcome" # welcome -> find_files -> translate -> create_github_pr
|
|
|
|
|
|
|
|
|
|
| 22 |
self.target_language = "ko"
|
| 23 |
self.k_files = 10
|
| 24 |
self.files_to_translate = []
|
| 25 |
self.additional_instruction = ""
|
| 26 |
self.current_file_content = {"translated": ""}
|
| 27 |
-
self.pr_result = None
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
"
|
| 32 |
-
"
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
state = ChatState()
|
|
@@ -53,25 +76,41 @@ def _extract_content_for_display(content: str) -> str:
|
|
| 53 |
|
| 54 |
|
| 55 |
def get_welcome_message():
|
| 56 |
-
"""Initial welcome message with
|
| 57 |
return """**👋 Welcome to 🌐 Hugging Face i18n Translation Agent!**
|
| 58 |
|
| 59 |
I'll help you find files that need translation and translate them in a streamlined workflow.
|
| 60 |
|
| 61 |
-
|
| 62 |
|
| 63 |
-
Use the **`Quick Controls`** on the right or **ask me `what`, `how`, or `help`** to get started.
|
| 64 |
"""
|
| 65 |
|
| 66 |
|
| 67 |
-
def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
| 68 |
"""Process file search request and update Gradio UI components."""
|
| 69 |
global state
|
|
|
|
| 70 |
state.target_language = lang
|
| 71 |
state.k_files = k
|
| 72 |
state.step = "find_files"
|
| 73 |
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
state.files_to_translate = (
|
| 76 |
[file[0] for file in files_list]
|
| 77 |
if files_list
|
|
@@ -87,8 +126,10 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
|
| 87 |
"""
|
| 88 |
|
| 89 |
if state.files_to_translate:
|
|
|
|
| 90 |
for i, file in enumerate(state.files_to_translate, 1):
|
| 91 |
-
|
|
|
|
| 92 |
|
| 93 |
# if len(state.files_to_translate) > 5:
|
| 94 |
# response += f"\n... and {len(state.files_to_translate) - 5} more files"
|
|
@@ -100,14 +141,13 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
|
| 100 |
# Add to history
|
| 101 |
history.append(["Please find files that need translation", response])
|
| 102 |
cleared_input = ""
|
| 103 |
-
selected_tab = 1 if state.files_to_translate else 0
|
| 104 |
|
| 105 |
# 드롭다운 choices로 쓸 파일 리스트 반환 추가
|
| 106 |
return (
|
| 107 |
history,
|
| 108 |
cleared_input,
|
| 109 |
update_status(),
|
| 110 |
-
gr.Tabs(
|
| 111 |
update_dropdown_choices(state.files_to_translate),
|
| 112 |
)
|
| 113 |
|
|
@@ -116,7 +156,30 @@ def update_dropdown_choices(file_list):
|
|
| 116 |
return gr.update(choices=file_list, value=None)
|
| 117 |
|
| 118 |
|
| 119 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
"""Start the translation process for the first file"""
|
| 121 |
if not state.files_to_translate:
|
| 122 |
return "❌ No files available for translation.", ""
|
|
@@ -125,8 +188,8 @@ def start_translation_process():
|
|
| 125 |
|
| 126 |
# Call translation function (simplified for demo)
|
| 127 |
try:
|
| 128 |
-
translated = translate_docs_interactive(
|
| 129 |
-
state.target_language, [[current_file]], state.additional_instruction
|
| 130 |
)
|
| 131 |
|
| 132 |
state.current_file_content = {"translated": translated}
|
|
@@ -138,19 +201,22 @@ def start_translation_process():
|
|
| 138 |
p.parent.mkdir(parents=True, exist_ok=True)
|
| 139 |
p.write_text(translated, encoding="utf-8")
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
)
|
| 144 |
print("Compeleted translation:\n")
|
| 145 |
print(translated)
|
| 146 |
print("----------------------------")
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
""
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
return response, translated
|
| 155 |
|
| 156 |
|
|
@@ -188,7 +254,12 @@ Currently available actions with quick controls:
|
|
| 188 |
else:
|
| 189 |
return """I understand you want to work on translations!
|
| 190 |
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
"""
|
| 193 |
|
| 194 |
|
|
@@ -226,12 +297,12 @@ def handle_user_message(message, history):
|
|
| 226 |
|
| 227 |
def update_status():
|
| 228 |
if state.step == "welcome":
|
| 229 |
-
return """
|
| 230 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
| 231 |
<div><strong>🔄 Step:</strong> Welcome</div>
|
|
|
|
| 232 |
<div><strong>📁 Files:</strong> 0</div>
|
| 233 |
-
<div><strong>🌍 Language:</strong>
|
| 234 |
-
<div><strong>⏳ Progress:</strong> Ready</div>
|
| 235 |
</div>
|
| 236 |
"""
|
| 237 |
|
|
@@ -267,6 +338,7 @@ def update_status():
|
|
| 267 |
status_html = f"""
|
| 268 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
| 269 |
<div><strong>🔄 Step:</strong> {step_map.get(state.step, state.step)}</div>
|
|
|
|
| 270 |
<div><strong>📁 Files:</strong> {len(state.files_to_translate)}</div>
|
| 271 |
<div><strong>🌍 Language:</strong> {state.target_language}</div>
|
| 272 |
<div><strong>⏳ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
|
|
@@ -284,26 +356,66 @@ def sync_language_displays(lang):
|
|
| 284 |
return lang
|
| 285 |
|
| 286 |
|
| 287 |
-
def
|
| 288 |
-
"""Update
|
| 289 |
global state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
-
# Set GitHub token in environment variables
|
| 292 |
-
if token:
|
| 293 |
-
os.environ["GITHUB_TOKEN"] = token
|
| 294 |
-
|
| 295 |
-
# Save GitHub configuration to state
|
| 296 |
-
state.github_config.update(
|
| 297 |
-
{
|
| 298 |
-
"token": token,
|
| 299 |
-
"owner": owner,
|
| 300 |
-
"repo_name": repo,
|
| 301 |
-
"reference_pr_url": reference_pr_url
|
| 302 |
-
or state.github_config["reference_pr_url"],
|
| 303 |
-
}
|
| 304 |
-
)
|
| 305 |
|
| 306 |
-
|
|
|
|
|
|
|
| 307 |
|
| 308 |
|
| 309 |
def update_prompt_preview(language, file_path, additional_instruction):
|
|
@@ -319,7 +431,7 @@ def update_prompt_preview(language, file_path, additional_instruction):
|
|
| 319 |
translation_lang = language
|
| 320 |
|
| 321 |
# Get sample content (first 500 characters)
|
| 322 |
-
content = get_content(file_path)
|
| 323 |
to_translate = preprocess_content(content)
|
| 324 |
|
| 325 |
# Truncate for preview
|
|
@@ -330,7 +442,10 @@ def update_prompt_preview(language, file_path, additional_instruction):
|
|
| 330 |
|
| 331 |
return prompt
|
| 332 |
except Exception as e:
|
| 333 |
-
|
|
|
|
|
|
|
|
|
|
| 334 |
|
| 335 |
|
| 336 |
def send_message(message, history):
|
|
@@ -339,14 +454,39 @@ def send_message(message, history):
|
|
| 339 |
|
| 340 |
|
| 341 |
# Button handlers with tab switching
|
| 342 |
-
def start_translate_handler(history,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
| 344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
state.additional_instruction = additional_instruction
|
| 346 |
state.files_to_translate = [file_to_translate]
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
|
| 352 |
def approve_handler(history, owner, repo, reference_pr_url):
|
|
@@ -354,18 +494,34 @@ def approve_handler(history, owner, repo, reference_pr_url):
|
|
| 354 |
global state
|
| 355 |
state.step = "create_github_pr"
|
| 356 |
|
| 357 |
-
#
|
| 358 |
-
state.
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
history.append(["GitHub PR creation request", response])
|
| 367 |
return history, "", update_status()
|
| 368 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
# If reference PR is not provided, use the agent to find one
|
| 370 |
if not github_config.get("reference_pr_url"):
|
| 371 |
response = "🤖 **Reference PR URL not found. The agent will now search for a suitable one...**"
|
|
@@ -421,6 +577,7 @@ def approve_handler(history, owner, repo, reference_pr_url):
|
|
| 421 |
translated_content=translated_content,
|
| 422 |
github_config=state.github_config,
|
| 423 |
en_title=file_name,
|
|
|
|
| 424 |
)
|
| 425 |
response += f"\n{pr_response}"
|
| 426 |
else:
|
|
@@ -431,9 +588,23 @@ def approve_handler(history, owner, repo, reference_pr_url):
|
|
| 431 |
|
| 432 |
|
| 433 |
def restart_handler(history):
|
| 434 |
-
"""Resets the state
|
| 435 |
global state
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
state = ChatState()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
welcome_msg = get_welcome_message()
|
| 438 |
new_hist = [[None, welcome_msg]]
|
| 439 |
return new_hist, "", update_status(), gr.Tabs(selected=0)
|
|
|
|
| 13 |
)
|
| 14 |
from pr_generator.searcher import find_reference_pr_simple_stream
|
| 15 |
from translator.content import get_full_prompt, get_content, preprocess_content
|
| 16 |
+
from translator.project_config import get_available_projects, get_project_config
|
| 17 |
|
| 18 |
|
| 19 |
# State management
|
| 20 |
class ChatState:
|
| 21 |
def __init__(self):
|
| 22 |
self.step = "welcome" # welcome -> find_files -> translate -> create_github_pr
|
| 23 |
+
|
| 24 |
+
# Transient state (reset on restart)
|
| 25 |
+
self.selected_project = "transformers"
|
| 26 |
self.target_language = "ko"
|
| 27 |
self.k_files = 10
|
| 28 |
self.files_to_translate = []
|
| 29 |
self.additional_instruction = ""
|
| 30 |
self.current_file_content = {"translated": ""}
|
| 31 |
+
self.pr_result = None
|
| 32 |
+
|
| 33 |
+
# Persistent settings (preserved across restarts)
|
| 34 |
+
self.persistent_settings = {
|
| 35 |
+
"anthropic_api_key": "",
|
| 36 |
+
"github_config": {
|
| 37 |
+
"token": "",
|
| 38 |
+
"owner": "",
|
| 39 |
+
"repo_name": "",
|
| 40 |
+
"reference_pr_url": "",
|
| 41 |
+
}
|
| 42 |
}
|
| 43 |
+
|
| 44 |
+
def reset_transient_state(self):
|
| 45 |
+
"""Reset only the workflow state, keep persistent settings"""
|
| 46 |
+
self.step = "welcome"
|
| 47 |
+
self.selected_project = "transformers"
|
| 48 |
+
self.target_language = "ko"
|
| 49 |
+
self.k_files = 10
|
| 50 |
+
self.files_to_translate = []
|
| 51 |
+
self.additional_instruction = ""
|
| 52 |
+
self.current_file_content = {"translated": ""}
|
| 53 |
+
self.pr_result = None
|
| 54 |
+
|
| 55 |
+
@property
|
| 56 |
+
def github_config(self):
|
| 57 |
+
return self.persistent_settings["github_config"]
|
| 58 |
|
| 59 |
|
| 60 |
state = ChatState()
|
|
|
|
| 76 |
|
| 77 |
|
| 78 |
def get_welcome_message():
|
| 79 |
+
"""Initial welcome message with project selection"""
|
| 80 |
return """**👋 Welcome to 🌐 Hugging Face i18n Translation Agent!**
|
| 81 |
|
| 82 |
I'll help you find files that need translation and translate them in a streamlined workflow.
|
| 83 |
|
| 84 |
+
**🎯 First, select which project you want to translate:**
|
| 85 |
|
| 86 |
+
Use the **`Quick Controls`** on the right to select a project, or **ask me `what`, `how`, or `help`** to get started.
|
| 87 |
"""
|
| 88 |
|
| 89 |
|
| 90 |
+
def process_file_search_handler(project: str, lang: str, k: int, history: list) -> tuple:
|
| 91 |
"""Process file search request and update Gradio UI components."""
|
| 92 |
global state
|
| 93 |
+
state.selected_project = project
|
| 94 |
state.target_language = lang
|
| 95 |
state.k_files = k
|
| 96 |
state.step = "find_files"
|
| 97 |
|
| 98 |
+
try:
|
| 99 |
+
status_report, files_list = report_translation_target_files(project, lang, k)
|
| 100 |
+
except Exception as e:
|
| 101 |
+
if "rate limit" in str(e).lower():
|
| 102 |
+
response = f"""❌ **GitHub API Rate Limit Exceeded**
|
| 103 |
+
|
| 104 |
+
{str(e)}
|
| 105 |
+
|
| 106 |
+
**💡 To fix this:**
|
| 107 |
+
1. Set GitHub Token in Configuration panel above
|
| 108 |
+
2. Click "💾 Save Configuration"
|
| 109 |
+
3. Try "Find Files" again"""
|
| 110 |
+
history.append(["File search request", response])
|
| 111 |
+
return history, "", update_status(), gr.Tabs(selected=0), gr.update(choices=[]), gr.update(visible=False)
|
| 112 |
+
else:
|
| 113 |
+
raise # Re-raise non-rate-limit errors
|
| 114 |
state.files_to_translate = (
|
| 115 |
[file[0] for file in files_list]
|
| 116 |
if files_list
|
|
|
|
| 126 |
"""
|
| 127 |
|
| 128 |
if state.files_to_translate:
|
| 129 |
+
config = get_project_config(state.selected_project)
|
| 130 |
for i, file in enumerate(state.files_to_translate, 1):
|
| 131 |
+
file_link = f"{config.repo_url}/blob/main/{file}"
|
| 132 |
+
response += f"\n{i}. [`{file}`]({file_link})"
|
| 133 |
|
| 134 |
# if len(state.files_to_translate) > 5:
|
| 135 |
# response += f"\n... and {len(state.files_to_translate) - 5} more files"
|
|
|
|
| 141 |
# Add to history
|
| 142 |
history.append(["Please find files that need translation", response])
|
| 143 |
cleared_input = ""
|
|
|
|
| 144 |
|
| 145 |
# 드롭다운 choices로 쓸 파일 리스트 반환 추가
|
| 146 |
return (
|
| 147 |
history,
|
| 148 |
cleared_input,
|
| 149 |
update_status(),
|
| 150 |
+
gr.Tabs(), # Don't change tab
|
| 151 |
update_dropdown_choices(state.files_to_translate),
|
| 152 |
)
|
| 153 |
|
|
|
|
| 156 |
return gr.update(choices=file_list, value=None)
|
| 157 |
|
| 158 |
|
| 159 |
+
def confirm_and_go_translate_handler(history):
|
| 160 |
+
"""Confirm selection and go to translate tab"""
|
| 161 |
+
global state
|
| 162 |
+
|
| 163 |
+
response = f"✅ **Selection confirmed!**\n\n🎯 **Project:** {state.selected_project}\n🌍 **Language:** {state.target_language}\n\n**➡️ Go to Tab 2 to start translation.**"
|
| 164 |
+
history.append(["Confirm selection", response])
|
| 165 |
+
return history, "", update_status(), gr.Tabs(selected=1)
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def confirm_translation_and_go_upload_handler(history):
|
| 169 |
+
"""Confirm translation and go to upload PR tab"""
|
| 170 |
+
global state
|
| 171 |
+
|
| 172 |
+
if not state.current_file_content.get("translated"):
|
| 173 |
+
response = "❌ No translation available. Please complete translation first."
|
| 174 |
+
history.append(["Upload PR request", response])
|
| 175 |
+
return history, "", update_status(), gr.Tabs()
|
| 176 |
+
|
| 177 |
+
response = f"✅ **Translation confirmed!**\n\n📄 **File:** `{state.files_to_translate[0] if state.files_to_translate else 'Unknown'}`\n\n**➡️ Go to Tab 3 to upload PR.**"
|
| 178 |
+
history.append(["Upload PR request", response])
|
| 179 |
+
return history, "", update_status(), gr.Tabs(selected=2)
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def start_translation_process(force_retranslate=False):
|
| 183 |
"""Start the translation process for the first file"""
|
| 184 |
if not state.files_to_translate:
|
| 185 |
return "❌ No files available for translation.", ""
|
|
|
|
| 188 |
|
| 189 |
# Call translation function (simplified for demo)
|
| 190 |
try:
|
| 191 |
+
status, translated = translate_docs_interactive(
|
| 192 |
+
state.target_language, [[current_file]], state.additional_instruction, state.selected_project, force_retranslate
|
| 193 |
)
|
| 194 |
|
| 195 |
state.current_file_content = {"translated": translated}
|
|
|
|
| 201 |
p.parent.mkdir(parents=True, exist_ok=True)
|
| 202 |
p.write_text(translated, encoding="utf-8")
|
| 203 |
|
| 204 |
+
config = get_project_config(state.selected_project)
|
| 205 |
+
original_file_link = f"{config.repo_url}/blob/main/{current_file}"
|
|
|
|
| 206 |
print("Compeleted translation:\n")
|
| 207 |
print(translated)
|
| 208 |
print("----------------------------")
|
| 209 |
+
|
| 210 |
+
# Different response format for existing vs new translation
|
| 211 |
+
if isinstance(status, str) and "Existing translation loaded" in status:
|
| 212 |
+
response = f"{status}\n**📄 Original Content Link:** {original_file_link}\n\n**🌐 Translated Content:**"
|
| 213 |
+
else:
|
| 214 |
+
response = (
|
| 215 |
+
f"""🔄 Translation for: `{current_file}`\n"""
|
| 216 |
+
f"**📄 Original Content Link:** {original_file_link}\n\n"
|
| 217 |
+
f"{status}\n\n"
|
| 218 |
+
"**🌐 Translated Content:**"
|
| 219 |
+
)
|
| 220 |
return response, translated
|
| 221 |
|
| 222 |
|
|
|
|
| 254 |
else:
|
| 255 |
return """I understand you want to work on translations!
|
| 256 |
|
| 257 |
+
**Two ways to get started:**
|
| 258 |
+
|
| 259 |
+
1. **🔍 Find Files first** - Use Tab 1 to discover files that need translation
|
| 260 |
+
2. **🚀 Direct Translation** - Go to Tab 2 and enter a file path directly (e.g., `docs/source/en/model_doc/bert.md`)
|
| 261 |
+
|
| 262 |
+
Make sure to configure your API keys in the Configuration panel above.
|
| 263 |
"""
|
| 264 |
|
| 265 |
|
|
|
|
| 297 |
|
| 298 |
def update_status():
|
| 299 |
if state.step == "welcome":
|
| 300 |
+
return f"""
|
| 301 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
| 302 |
<div><strong>🔄 Step:</strong> Welcome</div>
|
| 303 |
+
<div><strong>🎯 Project:</strong> {state.selected_project}</div>
|
| 304 |
<div><strong>📁 Files:</strong> 0</div>
|
| 305 |
+
<div><strong>🌍 Language:</strong> {state.target_language}</div>
|
|
|
|
| 306 |
</div>
|
| 307 |
"""
|
| 308 |
|
|
|
|
| 338 |
status_html = f"""
|
| 339 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
| 340 |
<div><strong>🔄 Step:</strong> {step_map.get(state.step, state.step)}</div>
|
| 341 |
+
<div><strong>🎯 Project:</strong> {state.selected_project}</div>
|
| 342 |
<div><strong>📁 Files:</strong> {len(state.files_to_translate)}</div>
|
| 343 |
<div><strong>🌍 Language:</strong> {state.target_language}</div>
|
| 344 |
<div><strong>⏳ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
|
|
|
|
| 356 |
return lang
|
| 357 |
|
| 358 |
|
| 359 |
+
def update_project_selection(project, history):
|
| 360 |
+
"""Update state when project is selected"""
|
| 361 |
global state
|
| 362 |
+
state.selected_project = project
|
| 363 |
+
response = f"Selection confirmed: 🎯 Project → **{project}**"
|
| 364 |
+
history.append(["Project selection", response])
|
| 365 |
+
return history, "", update_status()
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def update_language_selection(lang, history):
|
| 369 |
+
"""Update state when language is selected"""
|
| 370 |
+
global state
|
| 371 |
+
state.target_language = lang
|
| 372 |
+
response = f"Selection confirmed: 🌍 Language → **{lang}**"
|
| 373 |
+
history.append(["Language selection", response])
|
| 374 |
+
return history, "", update_status(), lang
|
| 375 |
+
|
| 376 |
+
|
| 377 |
+
def update_persistent_config(anthropic_key, github_token, github_owner, github_repo, reference_pr_url, history):
|
| 378 |
+
"""Update persistent configuration settings."""
|
| 379 |
+
global state
|
| 380 |
+
|
| 381 |
+
# Update API keys
|
| 382 |
+
if anthropic_key:
|
| 383 |
+
state.persistent_settings["anthropic_api_key"] = anthropic_key
|
| 384 |
+
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
| 385 |
+
|
| 386 |
+
if github_token:
|
| 387 |
+
os.environ["GITHUB_TOKEN"] = github_token
|
| 388 |
+
|
| 389 |
+
# Get default reference PR URL from project config if not provided
|
| 390 |
+
if not reference_pr_url and state.selected_project:
|
| 391 |
+
try:
|
| 392 |
+
config = get_project_config(state.selected_project)
|
| 393 |
+
reference_pr_url = config.reference_pr_url
|
| 394 |
+
except:
|
| 395 |
+
pass
|
| 396 |
+
|
| 397 |
+
# Save GitHub configuration to persistent settings
|
| 398 |
+
state.persistent_settings["github_config"].update({
|
| 399 |
+
"token": github_token or "",
|
| 400 |
+
"owner": github_owner or "",
|
| 401 |
+
"repo_name": github_repo or "",
|
| 402 |
+
"reference_pr_url": reference_pr_url or "",
|
| 403 |
+
})
|
| 404 |
+
|
| 405 |
+
# Build response message based on what was configured
|
| 406 |
+
response = "✅ Configuration saved!"
|
| 407 |
+
if github_owner and github_repo:
|
| 408 |
+
response += f" GitHub: {github_owner}/{github_repo}"
|
| 409 |
+
elif anthropic_key:
|
| 410 |
+
response += " Anthropic API key updated."
|
| 411 |
+
|
| 412 |
+
history.append(["Configuration update", response])
|
| 413 |
+
return history, "", update_status()
|
| 414 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
|
| 416 |
+
def update_github_config(token, owner, repo, reference_pr_url):
|
| 417 |
+
"""Legacy function for backward compatibility."""
|
| 418 |
+
return update_persistent_config("", token, owner, repo, reference_pr_url)
|
| 419 |
|
| 420 |
|
| 421 |
def update_prompt_preview(language, file_path, additional_instruction):
|
|
|
|
| 431 |
translation_lang = language
|
| 432 |
|
| 433 |
# Get sample content (first 500 characters)
|
| 434 |
+
content = get_content(file_path, state.selected_project)
|
| 435 |
to_translate = preprocess_content(content)
|
| 436 |
|
| 437 |
# Truncate for preview
|
|
|
|
| 442 |
|
| 443 |
return prompt
|
| 444 |
except Exception as e:
|
| 445 |
+
error_str = str(e)
|
| 446 |
+
if "Failed to retrieve content from the URL" in error_str:
|
| 447 |
+
return f"❌ **File not found:** `{file_path}`\n\n💡 **Please check:**\n1. Is this file in the **{state.selected_project}** project?\n2. Use \"🔍 Find Files to Translate\" to see available files\n3. Verify the file path is correct"
|
| 448 |
+
return f"Error generating prompt preview: {error_str}"
|
| 449 |
|
| 450 |
|
| 451 |
def send_message(message, history):
|
|
|
|
| 454 |
|
| 455 |
|
| 456 |
# Button handlers with tab switching
|
| 457 |
+
def start_translate_handler(history, file_to_translate, additional_instruction="", force_retranslate=False):
|
| 458 |
+
# Use persistent anthropic key
|
| 459 |
+
anthropic_key = state.persistent_settings["anthropic_api_key"]
|
| 460 |
+
if not anthropic_key:
|
| 461 |
+
response = "❌ Please set Anthropic API key in Configuration panel first."
|
| 462 |
+
history.append(["Translation request", response])
|
| 463 |
+
return history, "", update_status(), gr.Tabs(), gr.update(), gr.update()
|
| 464 |
+
|
| 465 |
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
|
| 466 |
|
| 467 |
+
# Check if file path is provided
|
| 468 |
+
if not file_to_translate or not file_to_translate.strip():
|
| 469 |
+
response = "❌ Please select a file from the dropdown or enter a file path to translate."
|
| 470 |
+
history.append(["Translation request", response])
|
| 471 |
+
return history, "", update_status(), gr.Tabs(), gr.update(), gr.update()
|
| 472 |
+
|
| 473 |
state.additional_instruction = additional_instruction
|
| 474 |
state.files_to_translate = [file_to_translate]
|
| 475 |
+
state.step = "translate"
|
| 476 |
+
|
| 477 |
+
# Start translation directly
|
| 478 |
+
if force_retranslate:
|
| 479 |
+
history.append(["Translation request", "🔄 **Force retranslation started...**"])
|
| 480 |
+
response, translated = start_translation_process(force_retranslate)
|
| 481 |
+
history.append(["", response])
|
| 482 |
+
if translated:
|
| 483 |
+
history.append(["", translated])
|
| 484 |
+
|
| 485 |
+
# Update button text and show confirm button after translation
|
| 486 |
+
start_btn_text = "🔄 Retranslation" if state.current_file_content["translated"] else "🚀 Start Translation"
|
| 487 |
+
confirm_btn_visible = bool(state.current_file_content["translated"])
|
| 488 |
+
|
| 489 |
+
return history, "", update_status(), gr.Tabs(), gr.update(value=start_btn_text), gr.update(visible=confirm_btn_visible)
|
| 490 |
|
| 491 |
|
| 492 |
def approve_handler(history, owner, repo, reference_pr_url):
|
|
|
|
| 494 |
global state
|
| 495 |
state.step = "create_github_pr"
|
| 496 |
|
| 497 |
+
# Check all required GitHub configuration at once
|
| 498 |
+
github_config = state.persistent_settings["github_config"]
|
| 499 |
+
missing_config = []
|
| 500 |
+
|
| 501 |
+
if not github_config.get("token"):
|
| 502 |
+
missing_config.append("GitHub Token")
|
| 503 |
+
if not owner:
|
| 504 |
+
missing_config.append("GitHub Owner")
|
| 505 |
+
if not repo:
|
| 506 |
+
missing_config.append("Repository Name")
|
| 507 |
+
|
| 508 |
+
if missing_config:
|
| 509 |
+
config = get_project_config(state.selected_project)
|
| 510 |
+
repo_name = config.repo_url.split('/')[-1] # Extract repo name from URL
|
| 511 |
+
response = f"❌ Please set the following in Configuration panel first: {', '.join(missing_config)}\n\n💡 **Note:** GitHub Owner/Repository should be your fork of [`{repo_name}`]({config.repo_url}) (e.g., Owner: `your-username`, Repository: `{repo_name}`)"
|
| 512 |
history.append(["GitHub PR creation request", response])
|
| 513 |
return history, "", update_status()
|
| 514 |
|
| 515 |
+
# Update reference PR URL (can be set per PR)
|
| 516 |
+
if reference_pr_url:
|
| 517 |
+
state.persistent_settings["github_config"]["reference_pr_url"] = reference_pr_url
|
| 518 |
+
|
| 519 |
+
# Use persistent settings
|
| 520 |
+
github_config = state.persistent_settings["github_config"]
|
| 521 |
+
|
| 522 |
+
# Initialize response variable
|
| 523 |
+
response = ""
|
| 524 |
+
|
| 525 |
# If reference PR is not provided, use the agent to find one
|
| 526 |
if not github_config.get("reference_pr_url"):
|
| 527 |
response = "🤖 **Reference PR URL not found. The agent will now search for a suitable one...**"
|
|
|
|
| 577 |
translated_content=translated_content,
|
| 578 |
github_config=state.github_config,
|
| 579 |
en_title=file_name,
|
| 580 |
+
project=state.selected_project,
|
| 581 |
)
|
| 582 |
response += f"\n{pr_response}"
|
| 583 |
else:
|
|
|
|
| 588 |
|
| 589 |
|
| 590 |
def restart_handler(history):
|
| 591 |
+
"""Resets the workflow state but preserves persistent settings."""
|
| 592 |
global state
|
| 593 |
+
# Backup persistent settings
|
| 594 |
+
backup_settings = state.persistent_settings.copy()
|
| 595 |
+
|
| 596 |
+
# Reset state
|
| 597 |
state = ChatState()
|
| 598 |
+
|
| 599 |
+
# Restore persistent settings
|
| 600 |
+
state.persistent_settings = backup_settings
|
| 601 |
+
|
| 602 |
+
# Restore environment variables
|
| 603 |
+
if backup_settings["anthropic_api_key"]:
|
| 604 |
+
os.environ["ANTHROPIC_API_KEY"] = backup_settings["anthropic_api_key"]
|
| 605 |
+
if backup_settings["github_config"]["token"]:
|
| 606 |
+
os.environ["GITHUB_TOKEN"] = backup_settings["github_config"]["token"]
|
| 607 |
+
|
| 608 |
welcome_msg = get_welcome_message()
|
| 609 |
new_hist = [[None, welcome_msg]]
|
| 610 |
return new_hist, "", update_status(), gr.Tabs(selected=0)
|
agent/toctree_handler.py
CHANGED
|
@@ -4,9 +4,17 @@ from typing import Dict, List, Any
|
|
| 4 |
import os
|
| 5 |
|
| 6 |
class TocTreeHandler:
|
| 7 |
-
def __init__(self):
|
| 8 |
-
|
| 9 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
self.local_docs_path = "docs/source/ko"
|
| 11 |
|
| 12 |
def fetch_toctree(self, url: str) -> Dict[str, Any]:
|
|
@@ -245,7 +253,8 @@ Korean title:"""
|
|
| 245 |
translation_result: dict,
|
| 246 |
filepath: str,
|
| 247 |
pr_agent,
|
| 248 |
-
github_config: dict
|
|
|
|
| 249 |
) -> dict:
|
| 250 |
"""Update toctree after successful translation PR.
|
| 251 |
|
|
|
|
| 4 |
import os
|
| 5 |
|
| 6 |
class TocTreeHandler:
|
| 7 |
+
def __init__(self, project: str = "transformers"):
|
| 8 |
+
from translator.project_config import get_project_config
|
| 9 |
+
self.project = project
|
| 10 |
+
self.project_config = get_project_config(project)
|
| 11 |
+
|
| 12 |
+
# Extract repository path from config
|
| 13 |
+
repo_path = self.project_config.repo_url.replace("https://github.com/", "")
|
| 14 |
+
|
| 15 |
+
# Build project-specific URLs
|
| 16 |
+
self.en_toctree_url = f"https://raw.githubusercontent.com/{repo_path}/main/docs/source/en/_toctree.yml"
|
| 17 |
+
self.ko_toctree_url = f"https://raw.githubusercontent.com/{repo_path}/main/docs/source/ko/_toctree.yml"
|
| 18 |
self.local_docs_path = "docs/source/ko"
|
| 19 |
|
| 20 |
def fetch_toctree(self, url: str) -> Dict[str, Any]:
|
|
|
|
| 253 |
translation_result: dict,
|
| 254 |
filepath: str,
|
| 255 |
pr_agent,
|
| 256 |
+
github_config: dict,
|
| 257 |
+
project: str = "transformers"
|
| 258 |
) -> dict:
|
| 259 |
"""Update toctree after successful translation PR.
|
| 260 |
|
agent/workflow.py
CHANGED
|
@@ -11,7 +11,7 @@ from translator.content import (
|
|
| 11 |
llm_translate,
|
| 12 |
preprocess_content,
|
| 13 |
)
|
| 14 |
-
from translator.retriever import report, get_github_issue_open_pr
|
| 15 |
# GitHub PR Agent import
|
| 16 |
try:
|
| 17 |
from pr_generator.agent import GitHubPRAgent
|
|
@@ -26,19 +26,23 @@ from logger.github_logger import GitHubLogger
|
|
| 26 |
|
| 27 |
|
| 28 |
def report_translation_target_files(
|
| 29 |
-
translate_lang: str, top_k: int = 1
|
| 30 |
) -> tuple[str, list[list[str]]]:
|
| 31 |
"""Return the top-k files that need translation, excluding files already in progress.
|
| 32 |
|
| 33 |
Args:
|
|
|
|
| 34 |
translate_lang: Target language to translate
|
| 35 |
top_k: Number of top-first files to return for translation. (Default 1)
|
| 36 |
"""
|
| 37 |
-
# Get files
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
# Get all available files for translation
|
| 41 |
-
all_status_report, all_filepath_list = report(translate_lang, top_k * 2) # Get more to account for filtering
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
# Filter out files that are already in progress
|
| 44 |
available_files = [f for f in all_filepath_list if f not in docs_in_progress]
|
|
@@ -52,29 +56,30 @@ def report_translation_target_files(
|
|
| 52 |
if docs_in_progress:
|
| 53 |
status_report += f"\n\n🤖 Found {len(docs_in_progress)} files in progress for translation:"
|
| 54 |
for i, file in enumerate(docs_in_progress):
|
| 55 |
-
status_report += f"\n{i+1}. `{file}
|
| 56 |
status_report += f"\n\n📋 Showing {len(filepath_list)} available files (excluding in-progress):"
|
| 57 |
|
| 58 |
return status_report, [[file] for file in filepath_list]
|
| 59 |
|
| 60 |
|
| 61 |
-
def translate_docs(lang: str, file_path: str, additional_instruction: str = "") -> tuple[str, str]:
|
| 62 |
"""Translate documentation."""
|
| 63 |
-
# Check if translation already exists
|
| 64 |
translation_file_path = (
|
| 65 |
Path(__file__).resolve().parent.parent
|
| 66 |
/ f"translation_result/{file_path}"
|
| 67 |
)
|
| 68 |
|
| 69 |
-
if translation_file_path.exists():
|
| 70 |
print(f"📄 Found existing translation: {translation_file_path}")
|
| 71 |
with open(translation_file_path, "r", encoding="utf-8") as f:
|
| 72 |
existing_content = f.read()
|
| 73 |
if existing_content.strip():
|
| 74 |
-
|
|
|
|
| 75 |
|
| 76 |
# step 1. Get content from file path
|
| 77 |
-
content = get_content(file_path)
|
| 78 |
to_translate = preprocess_content(content)
|
| 79 |
|
| 80 |
# step 2. Prepare prompt with docs content
|
|
@@ -97,7 +102,7 @@ def translate_docs(lang: str, file_path: str, additional_instruction: str = "")
|
|
| 97 |
|
| 98 |
|
| 99 |
def translate_docs_interactive(
|
| 100 |
-
translate_lang: str, selected_files: list[list[str]], additional_instruction: str = ""
|
| 101 |
) -> tuple[str, str]:
|
| 102 |
"""Interactive translation function that processes files one by one.
|
| 103 |
|
|
@@ -111,14 +116,22 @@ def translate_docs_interactive(
|
|
| 111 |
# Start with the first file
|
| 112 |
current_file = file_paths[0]
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
print(callback_result)
|
| 119 |
print(status)
|
| 120 |
|
| 121 |
-
return translated_content
|
| 122 |
|
| 123 |
|
| 124 |
def generate_github_pr(
|
|
@@ -127,6 +140,7 @@ def generate_github_pr(
|
|
| 127 |
translated_content: str = None,
|
| 128 |
github_config: dict = None,
|
| 129 |
en_title: str = None,
|
|
|
|
| 130 |
) -> str:
|
| 131 |
"""Generate a GitHub PR for translated documentation.
|
| 132 |
|
|
@@ -144,7 +158,7 @@ def generate_github_pr(
|
|
| 144 |
return "❌ GitHub PR Agent is not available. Please install required libraries."
|
| 145 |
|
| 146 |
if not github_config:
|
| 147 |
-
return "❌ GitHub configuration not provided."
|
| 148 |
|
| 149 |
# Validate required configuration
|
| 150 |
required_fields = ["token", "owner", "repo_name", "reference_pr_url"]
|
|
@@ -153,7 +167,7 @@ def generate_github_pr(
|
|
| 153 |
]
|
| 154 |
|
| 155 |
if missing_fields:
|
| 156 |
-
return f"❌ Missing required configuration: {', '.join(missing_fields)}
|
| 157 |
|
| 158 |
# Set token in environment for the agent.
|
| 159 |
os.environ["GITHUB_TOKEN"] = github_config["token"]
|
|
@@ -166,29 +180,39 @@ def generate_github_pr(
|
|
| 166 |
/ f"translation_result/{filepath}"
|
| 167 |
)
|
| 168 |
if not translation_file_path.exists():
|
| 169 |
-
return f"❌ Translation file not found: {translation_file_path}"
|
| 170 |
|
| 171 |
with open(translation_file_path, "r", encoding="utf-8") as f:
|
| 172 |
translated_content = f.read()
|
| 173 |
|
| 174 |
if not translated_content or not translated_content.strip():
|
| 175 |
-
return "❌ Translated content is empty."
|
| 176 |
|
| 177 |
# Execute GitHub PR Agent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
print(f"🚀 Starting GitHub PR creation...")
|
| 179 |
print(f" 📁 File: {filepath}")
|
| 180 |
print(f" 🌍 Language: {target_language}")
|
| 181 |
print(f" 📊 Reference PR: {github_config['reference_pr_url']}")
|
| 182 |
-
print(f" 🏠
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
result = agent.run_translation_pr_workflow(
|
| 186 |
reference_pr_url=github_config["reference_pr_url"],
|
| 187 |
target_language=target_language,
|
| 188 |
filepath=filepath,
|
| 189 |
translated_doc=translated_content,
|
| 190 |
-
owner=github_config["owner"],
|
| 191 |
-
repo_name=github_config["repo_name"],
|
| 192 |
base_branch=github_config.get("base_branch", "main"),
|
| 193 |
)
|
| 194 |
# TEST CODE
|
|
@@ -202,9 +226,9 @@ def generate_github_pr(
|
|
| 202 |
toctree_result = None
|
| 203 |
if en_title:
|
| 204 |
from agent.toctree_handler import TocTreeHandler
|
| 205 |
-
toctree_handler = TocTreeHandler()
|
| 206 |
toctree_result = toctree_handler.update_toctree_after_translation(
|
| 207 |
-
result, filepath, agent, github_config
|
| 208 |
)
|
| 209 |
|
| 210 |
# Process result
|
|
@@ -248,13 +272,29 @@ def generate_github_pr(
|
|
| 248 |
{result.get("error_details", "Unknown error")}"""
|
| 249 |
|
| 250 |
else:
|
|
|
|
| 251 |
return f"""❌ **GitHub PR Creation Failed**
|
| 252 |
|
| 253 |
**Error Message:**
|
| 254 |
-
{result["message"]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
|
| 256 |
except Exception as e:
|
| 257 |
-
error_msg = f"❌ Unexpected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
print(error_msg)
|
| 259 |
return error_msg
|
| 260 |
|
|
|
|
| 11 |
llm_translate,
|
| 12 |
preprocess_content,
|
| 13 |
)
|
| 14 |
+
from translator.retriever import report, get_github_issue_open_pr, get_github_repo_files
|
| 15 |
# GitHub PR Agent import
|
| 16 |
try:
|
| 17 |
from pr_generator.agent import GitHubPRAgent
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
def report_translation_target_files(
|
| 29 |
+
project: str, translate_lang: str, top_k: int = 1
|
| 30 |
) -> tuple[str, list[list[str]]]:
|
| 31 |
"""Return the top-k files that need translation, excluding files already in progress.
|
| 32 |
|
| 33 |
Args:
|
| 34 |
+
project: Project to translate (e.g., "transformers", "smolagents")
|
| 35 |
translate_lang: Target language to translate
|
| 36 |
top_k: Number of top-first files to return for translation. (Default 1)
|
| 37 |
"""
|
| 38 |
+
# Get repo files once to avoid duplicate API calls
|
| 39 |
+
all_repo_files = get_github_repo_files(project)
|
| 40 |
+
|
| 41 |
+
# Get all available files for translation using the file list
|
| 42 |
+
all_status_report, all_filepath_list = report(project, translate_lang, top_k * 2, all_repo_files) # Get more to account for filtering
|
| 43 |
+
|
| 44 |
+
# Get files in progress using the same file list
|
| 45 |
+
docs_in_progress, pr_info_list = get_github_issue_open_pr(project, translate_lang, all_repo_files)
|
| 46 |
|
| 47 |
# Filter out files that are already in progress
|
| 48 |
available_files = [f for f in all_filepath_list if f not in docs_in_progress]
|
|
|
|
| 56 |
if docs_in_progress:
|
| 57 |
status_report += f"\n\n🤖 Found {len(docs_in_progress)} files in progress for translation:"
|
| 58 |
for i, file in enumerate(docs_in_progress):
|
| 59 |
+
status_report += f"\n{i+1}. [`{file}`]({pr_info_list[i]})"
|
| 60 |
status_report += f"\n\n📋 Showing {len(filepath_list)} available files (excluding in-progress):"
|
| 61 |
|
| 62 |
return status_report, [[file] for file in filepath_list]
|
| 63 |
|
| 64 |
|
| 65 |
+
def translate_docs(lang: str, file_path: str, additional_instruction: str = "", project: str = "transformers", force_retranslate: bool = False) -> tuple[str, str]:
|
| 66 |
"""Translate documentation."""
|
| 67 |
+
# Check if translation already exists (unless force retranslate is enabled)
|
| 68 |
translation_file_path = (
|
| 69 |
Path(__file__).resolve().parent.parent
|
| 70 |
/ f"translation_result/{file_path}"
|
| 71 |
)
|
| 72 |
|
| 73 |
+
if not force_retranslate and translation_file_path.exists():
|
| 74 |
print(f"📄 Found existing translation: {translation_file_path}")
|
| 75 |
with open(translation_file_path, "r", encoding="utf-8") as f:
|
| 76 |
existing_content = f.read()
|
| 77 |
if existing_content.strip():
|
| 78 |
+
existing_msg = f"♻️ **Existing translation loaded** (no tokens used)\n📁 **File:** `{file_path}`\n📅 **Loaded from:** `{translation_file_path}`\n💡 **To retranslate:** Check 'Force Retranslate' option."
|
| 79 |
+
return existing_msg, existing_content
|
| 80 |
|
| 81 |
# step 1. Get content from file path
|
| 82 |
+
content = get_content(file_path, project)
|
| 83 |
to_translate = preprocess_content(content)
|
| 84 |
|
| 85 |
# step 2. Prepare prompt with docs content
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
def translate_docs_interactive(
|
| 105 |
+
translate_lang: str, selected_files: list[list[str]], additional_instruction: str = "", project: str = "transformers", force_retranslate: bool = False
|
| 106 |
) -> tuple[str, str]:
|
| 107 |
"""Interactive translation function that processes files one by one.
|
| 108 |
|
|
|
|
| 116 |
# Start with the first file
|
| 117 |
current_file = file_paths[0]
|
| 118 |
|
| 119 |
+
callback_result, translated_content = translate_docs(translate_lang, current_file, additional_instruction, project, force_retranslate)
|
| 120 |
+
|
| 121 |
+
# Check if existing translation was loaded
|
| 122 |
+
if isinstance(callback_result, str) and "Existing translation loaded" in callback_result:
|
| 123 |
+
status = callback_result # Use the existing translation message
|
| 124 |
+
else:
|
| 125 |
+
if force_retranslate:
|
| 126 |
+
status = f"🔄 **Force Retranslation completed**: `{current_file}` → `{translate_lang}`\n\n"
|
| 127 |
+
else:
|
| 128 |
+
status = f"✅ Translation completed: `{current_file}` → `{translate_lang}`\n\n"
|
| 129 |
+
status += f"💰 Used token and cost: \n```\n{callback_result}\n```"
|
| 130 |
|
| 131 |
print(callback_result)
|
| 132 |
print(status)
|
| 133 |
|
| 134 |
+
return status, translated_content
|
| 135 |
|
| 136 |
|
| 137 |
def generate_github_pr(
|
|
|
|
| 140 |
translated_content: str = None,
|
| 141 |
github_config: dict = None,
|
| 142 |
en_title: str = None,
|
| 143 |
+
project: str = "transformers",
|
| 144 |
) -> str:
|
| 145 |
"""Generate a GitHub PR for translated documentation.
|
| 146 |
|
|
|
|
| 158 |
return "❌ GitHub PR Agent is not available. Please install required libraries."
|
| 159 |
|
| 160 |
if not github_config:
|
| 161 |
+
return "❌ GitHub configuration not provided. Please set up GitHub token, owner, and repository in Configuration panel."
|
| 162 |
|
| 163 |
# Validate required configuration
|
| 164 |
required_fields = ["token", "owner", "repo_name", "reference_pr_url"]
|
|
|
|
| 167 |
]
|
| 168 |
|
| 169 |
if missing_fields:
|
| 170 |
+
return f"❌ Missing required GitHub configuration: {', '.join(missing_fields)}\n\n💡 Go to Configuration panel and set:\n" + "\n".join([f" • {field}" for field in missing_fields])
|
| 171 |
|
| 172 |
# Set token in environment for the agent.
|
| 173 |
os.environ["GITHUB_TOKEN"] = github_config["token"]
|
|
|
|
| 180 |
/ f"translation_result/{filepath}"
|
| 181 |
)
|
| 182 |
if not translation_file_path.exists():
|
| 183 |
+
return f"❌ Translation file not found: {translation_file_path}\n\n💡 Please complete translation first in Tab 2 for file: {filepath}"
|
| 184 |
|
| 185 |
with open(translation_file_path, "r", encoding="utf-8") as f:
|
| 186 |
translated_content = f.read()
|
| 187 |
|
| 188 |
if not translated_content or not translated_content.strip():
|
| 189 |
+
return f"❌ Translated content is empty for file: {filepath}\n\n💡 Please complete translation first in Tab 2."
|
| 190 |
|
| 191 |
# Execute GitHub PR Agent
|
| 192 |
+
# Get base repository from project config
|
| 193 |
+
from translator.project_config import get_project_config
|
| 194 |
+
project_config = get_project_config(project)
|
| 195 |
+
base_repo_path = project_config.repo_url.replace("https://github.com/", "")
|
| 196 |
+
base_owner, base_repo = base_repo_path.split("/")
|
| 197 |
+
|
| 198 |
print(f"🚀 Starting GitHub PR creation...")
|
| 199 |
print(f" 📁 File: {filepath}")
|
| 200 |
print(f" 🌍 Language: {target_language}")
|
| 201 |
print(f" 📊 Reference PR: {github_config['reference_pr_url']}")
|
| 202 |
+
print(f" 🏠 User Fork: {github_config['owner']}/{github_config['repo_name']}")
|
| 203 |
+
print(f" 🎯 Base Repository: {base_owner}/{base_repo}")
|
| 204 |
+
|
| 205 |
+
agent = GitHubPRAgent(
|
| 206 |
+
user_owner=github_config["owner"],
|
| 207 |
+
user_repo=github_config["repo_name"],
|
| 208 |
+
base_owner=base_owner,
|
| 209 |
+
base_repo=base_repo,
|
| 210 |
+
)
|
| 211 |
result = agent.run_translation_pr_workflow(
|
| 212 |
reference_pr_url=github_config["reference_pr_url"],
|
| 213 |
target_language=target_language,
|
| 214 |
filepath=filepath,
|
| 215 |
translated_doc=translated_content,
|
|
|
|
|
|
|
| 216 |
base_branch=github_config.get("base_branch", "main"),
|
| 217 |
)
|
| 218 |
# TEST CODE
|
|
|
|
| 226 |
toctree_result = None
|
| 227 |
if en_title:
|
| 228 |
from agent.toctree_handler import TocTreeHandler
|
| 229 |
+
toctree_handler = TocTreeHandler(project)
|
| 230 |
toctree_result = toctree_handler.update_toctree_after_translation(
|
| 231 |
+
result, filepath, agent, github_config, project
|
| 232 |
)
|
| 233 |
|
| 234 |
# Process result
|
|
|
|
| 272 |
{result.get("error_details", "Unknown error")}"""
|
| 273 |
|
| 274 |
else:
|
| 275 |
+
error_details = result.get("error_details", "No additional details")
|
| 276 |
return f"""❌ **GitHub PR Creation Failed**
|
| 277 |
|
| 278 |
**Error Message:**
|
| 279 |
+
{result["message"]}
|
| 280 |
+
|
| 281 |
+
**Error Details:**
|
| 282 |
+
{error_details}
|
| 283 |
+
|
| 284 |
+
💡 **Common Solutions:**
|
| 285 |
+
1. **Project Mismatch**: Selected project '{project}' but fork is '{github_config.get('repo_name', 'REPO')}' - ensure they match
|
| 286 |
+
2. Check if your GitHub fork exists: {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')}
|
| 287 |
+
3. Verify GitHub token has write access to your fork"""
|
| 288 |
|
| 289 |
except Exception as e:
|
| 290 |
+
error_msg = f"""❌ **Unexpected Error During PR Creation**
|
| 291 |
+
|
| 292 |
+
**Error:** {str(e)}
|
| 293 |
+
|
| 294 |
+
**Configuration:**
|
| 295 |
+
• Project: {project}
|
| 296 |
+
• File: {filepath}
|
| 297 |
+
• Target: {github_config.get('owner', 'USER')}/{github_config.get('repo_name', 'REPO')} → {base_owner if 'base_owner' in locals() else 'BASE'}/{base_repo if 'base_repo' in locals() else 'REPO'}"""
|
| 298 |
print(error_msg)
|
| 299 |
return error_msg
|
| 300 |
|
app.py
CHANGED
|
@@ -8,17 +8,23 @@ from dotenv import load_dotenv
|
|
| 8 |
|
| 9 |
from agent.handler import (
|
| 10 |
approve_handler,
|
|
|
|
|
|
|
| 11 |
get_welcome_message,
|
| 12 |
process_file_search_handler,
|
| 13 |
restart_handler,
|
| 14 |
send_message,
|
| 15 |
start_translate_handler,
|
| 16 |
sync_language_displays,
|
|
|
|
|
|
|
| 17 |
update_prompt_preview,
|
| 18 |
update_status,
|
| 19 |
update_github_config,
|
|
|
|
| 20 |
)
|
| 21 |
from translator.model import Languages
|
|
|
|
| 22 |
|
| 23 |
load_dotenv()
|
| 24 |
|
|
@@ -111,12 +117,54 @@ with gr.Blocks(
|
|
| 111 |
gr.Markdown("### 🌐 Hugging Face i18n Agent")
|
| 112 |
|
| 113 |
chatbot = gr.Chatbot(
|
| 114 |
-
value=[[None, get_welcome_message()]], scale=1, height=
|
| 115 |
show_copy_button=True
|
| 116 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
# Controller interface
|
| 119 |
with gr.Column(scale=2):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
# Quick Controller
|
| 121 |
with gr.Column(elem_classes=["control-panel"]):
|
| 122 |
gr.Markdown("### 🛠️ Quick Controls")
|
|
@@ -125,6 +173,11 @@ with gr.Blocks(
|
|
| 125 |
with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
|
| 126 |
with gr.TabItem("1. Find Files", id=0):
|
| 127 |
with gr.Group():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
lang_dropdown = gr.Radio(
|
| 129 |
choices=[language.value for language in Languages],
|
| 130 |
label="🌍 Translate To",
|
|
@@ -139,6 +192,11 @@ with gr.Blocks(
|
|
| 139 |
"🔍 Find Files to Translate",
|
| 140 |
elem_classes="action-button",
|
| 141 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
with gr.TabItem("2. Translate", id=1):
|
| 144 |
with gr.Group():
|
|
@@ -159,19 +217,19 @@ with gr.Blocks(
|
|
| 159 |
value="ko",
|
| 160 |
interactive=False,
|
| 161 |
)
|
| 162 |
-
anthropic_key = gr.Textbox(
|
| 163 |
-
label="🔑 Anthropic API key for translation generation",
|
| 164 |
-
type="password",
|
| 165 |
-
)
|
| 166 |
additional_instruction = gr.Textbox(
|
| 167 |
label="📝 Additional instructions (Optional - e.g., custom glossary)",
|
| 168 |
placeholder="Example: Translate 'model' as '모델' consistently",
|
| 169 |
lines=2,
|
| 170 |
)
|
| 171 |
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
prompt_preview = gr.Textbox(
|
| 174 |
-
label="Current Translation Prompt",
|
| 175 |
lines=8,
|
| 176 |
interactive=False,
|
| 177 |
placeholder="Select a file and language to see the prompt preview...",
|
|
@@ -181,29 +239,18 @@ with gr.Blocks(
|
|
| 181 |
start_translate_btn = gr.Button(
|
| 182 |
"🚀 Start Translation", elem_classes="action-button"
|
| 183 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
with gr.TabItem("3. Upload PR", id=2):
|
| 186 |
with gr.Group():
|
| 187 |
-
github_token = gr.Textbox(
|
| 188 |
-
label="🔑 GitHub Token",
|
| 189 |
-
type="password",
|
| 190 |
-
placeholder="ghp_xxxxxxxxxxxxxxxxxxxx",
|
| 191 |
-
)
|
| 192 |
-
github_owner = gr.Textbox(
|
| 193 |
-
label="👤 GitHub Owner/Username",
|
| 194 |
-
placeholder="your-username",
|
| 195 |
-
)
|
| 196 |
-
github_repo = gr.Textbox(
|
| 197 |
-
label="📁 Repository Name",
|
| 198 |
-
placeholder="your-repository",
|
| 199 |
-
)
|
| 200 |
reference_pr_url = gr.Textbox(
|
| 201 |
-
label="🔗 Reference PR URL (Optional
|
| 202 |
-
placeholder="
|
| 203 |
-
)
|
| 204 |
-
|
| 205 |
-
save_config_btn = gr.Button(
|
| 206 |
-
"💾 Save GitHub Config", elem_classes="action-button"
|
| 207 |
)
|
| 208 |
approve_btn = gr.Button(
|
| 209 |
"✅ Generate GitHub PR", elem_classes="action-button"
|
|
@@ -212,29 +259,38 @@ with gr.Blocks(
|
|
| 212 |
"🔄 Restart Translation", elem_classes="action-button"
|
| 213 |
)
|
| 214 |
|
| 215 |
-
# Chat Controller
|
| 216 |
-
with gr.Column(elem_classes=["control-panel"]):
|
| 217 |
-
gr.Markdown("### 💬 Chat with agent (Only simple chat is available)")
|
| 218 |
-
msg_input = gr.Textbox(
|
| 219 |
-
placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
|
| 220 |
-
container=False,
|
| 221 |
-
scale=4,
|
| 222 |
-
)
|
| 223 |
-
send_btn = gr.Button("Send", scale=1, elem_classes="action-button")
|
| 224 |
-
|
| 225 |
# Event Handlers
|
| 226 |
|
| 227 |
find_btn.click(
|
| 228 |
fn=process_file_search_handler,
|
| 229 |
-
inputs=[lang_dropdown, k_input, chatbot],
|
| 230 |
outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
|
| 231 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
lang_dropdown.change(
|
| 235 |
-
fn=
|
| 236 |
-
inputs=[lang_dropdown],
|
| 237 |
-
outputs=[translate_lang_display],
|
| 238 |
)
|
| 239 |
|
| 240 |
#
|
|
@@ -247,20 +303,26 @@ with gr.Blocks(
|
|
| 247 |
# Button event handlers
|
| 248 |
start_translate_btn.click(
|
| 249 |
fn=start_translate_handler,
|
| 250 |
-
inputs=[chatbot,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
outputs=[chatbot, msg_input, status_display, control_tabs],
|
| 252 |
)
|
| 253 |
|
| 254 |
-
#
|
| 255 |
save_config_btn.click(
|
| 256 |
-
fn=
|
| 257 |
-
inputs=[
|
| 258 |
-
outputs=[msg_input],
|
| 259 |
)
|
| 260 |
|
| 261 |
approve_btn.click(
|
| 262 |
fn=approve_handler,
|
| 263 |
-
inputs=[chatbot,
|
| 264 |
outputs=[chatbot, msg_input, status_display],
|
| 265 |
)
|
| 266 |
|
|
|
|
| 8 |
|
| 9 |
from agent.handler import (
|
| 10 |
approve_handler,
|
| 11 |
+
confirm_and_go_translate_handler,
|
| 12 |
+
confirm_translation_and_go_upload_handler,
|
| 13 |
get_welcome_message,
|
| 14 |
process_file_search_handler,
|
| 15 |
restart_handler,
|
| 16 |
send_message,
|
| 17 |
start_translate_handler,
|
| 18 |
sync_language_displays,
|
| 19 |
+
update_language_selection,
|
| 20 |
+
update_project_selection,
|
| 21 |
update_prompt_preview,
|
| 22 |
update_status,
|
| 23 |
update_github_config,
|
| 24 |
+
update_persistent_config,
|
| 25 |
)
|
| 26 |
from translator.model import Languages
|
| 27 |
+
from translator.project_config import get_available_projects
|
| 28 |
|
| 29 |
load_dotenv()
|
| 30 |
|
|
|
|
| 117 |
gr.Markdown("### 🌐 Hugging Face i18n Agent")
|
| 118 |
|
| 119 |
chatbot = gr.Chatbot(
|
| 120 |
+
value=[[None, get_welcome_message()]], scale=1, height=525,
|
| 121 |
show_copy_button=True
|
| 122 |
)
|
| 123 |
+
|
| 124 |
+
# Chat input directly under main chat
|
| 125 |
+
gr.Markdown("### 💬 Chat with agent")
|
| 126 |
+
with gr.Row():
|
| 127 |
+
msg_input = gr.Textbox(
|
| 128 |
+
placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
|
| 129 |
+
container=False,
|
| 130 |
+
scale=4,
|
| 131 |
+
)
|
| 132 |
+
send_btn = gr.Button("Send", scale=1, elem_classes="action-button")
|
| 133 |
|
| 134 |
# Controller interface
|
| 135 |
with gr.Column(scale=2):
|
| 136 |
+
# Configuration Panel
|
| 137 |
+
with gr.Column(elem_classes=["control-panel"]):
|
| 138 |
+
gr.Markdown("### ⚙️ Configuration")
|
| 139 |
+
|
| 140 |
+
with gr.Accordion("🔧 API & GitHub Settings", open=True):
|
| 141 |
+
config_anthropic_key = gr.Textbox(
|
| 142 |
+
label="🔑 Anthropic API Key",
|
| 143 |
+
type="password",
|
| 144 |
+
placeholder="sk-ant-...",
|
| 145 |
+
)
|
| 146 |
+
config_github_token = gr.Textbox(
|
| 147 |
+
label="🔑 GitHub Token (Required for PR, Optional for file search)",
|
| 148 |
+
type="password",
|
| 149 |
+
placeholder="ghp_...",
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
with gr.Row():
|
| 153 |
+
config_github_owner = gr.Textbox(
|
| 154 |
+
label="👤 GitHub Owner",
|
| 155 |
+
placeholder="your-username",
|
| 156 |
+
scale=1,
|
| 157 |
+
)
|
| 158 |
+
config_github_repo = gr.Textbox(
|
| 159 |
+
label="📁 Repository Name",
|
| 160 |
+
placeholder="your-repository",
|
| 161 |
+
scale=1,
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
save_config_btn = gr.Button(
|
| 165 |
+
"💾 Save Configuration", elem_classes="action-button"
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
# Quick Controller
|
| 169 |
with gr.Column(elem_classes=["control-panel"]):
|
| 170 |
gr.Markdown("### 🛠️ Quick Controls")
|
|
|
|
| 173 |
with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
|
| 174 |
with gr.TabItem("1. Find Files", id=0):
|
| 175 |
with gr.Group():
|
| 176 |
+
project_dropdown = gr.Radio(
|
| 177 |
+
choices=get_available_projects(),
|
| 178 |
+
label="🎯 Select Project",
|
| 179 |
+
value="transformers",
|
| 180 |
+
)
|
| 181 |
lang_dropdown = gr.Radio(
|
| 182 |
choices=[language.value for language in Languages],
|
| 183 |
label="🌍 Translate To",
|
|
|
|
| 192 |
"🔍 Find Files to Translate",
|
| 193 |
elem_classes="action-button",
|
| 194 |
)
|
| 195 |
+
|
| 196 |
+
confirm_go_btn = gr.Button(
|
| 197 |
+
"✅ Confirm Selection & Go to Translate",
|
| 198 |
+
elem_classes="action-button",
|
| 199 |
+
)
|
| 200 |
|
| 201 |
with gr.TabItem("2. Translate", id=1):
|
| 202 |
with gr.Group():
|
|
|
|
| 217 |
value="ko",
|
| 218 |
interactive=False,
|
| 219 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
additional_instruction = gr.Textbox(
|
| 221 |
label="📝 Additional instructions (Optional - e.g., custom glossary)",
|
| 222 |
placeholder="Example: Translate 'model' as '모델' consistently",
|
| 223 |
lines=2,
|
| 224 |
)
|
| 225 |
|
| 226 |
+
force_retranslate = gr.Checkbox(
|
| 227 |
+
label="🔄 Force Retranslate (ignore existing translations)",
|
| 228 |
+
value=False,
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
with gr.Accordion("🔍 Preview Translation Prompt", open=False):
|
| 232 |
prompt_preview = gr.Textbox(
|
|
|
|
| 233 |
lines=8,
|
| 234 |
interactive=False,
|
| 235 |
placeholder="Select a file and language to see the prompt preview...",
|
|
|
|
| 239 |
start_translate_btn = gr.Button(
|
| 240 |
"🚀 Start Translation", elem_classes="action-button"
|
| 241 |
)
|
| 242 |
+
|
| 243 |
+
confirm_upload_btn = gr.Button(
|
| 244 |
+
"✅ Confirm Translation & Upload PR",
|
| 245 |
+
elem_classes="action-button",
|
| 246 |
+
visible=False,
|
| 247 |
+
)
|
| 248 |
|
| 249 |
with gr.TabItem("3. Upload PR", id=2):
|
| 250 |
with gr.Group():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
reference_pr_url = gr.Textbox(
|
| 252 |
+
label="🔗 Reference PR URL (Optional)",
|
| 253 |
+
placeholder="Auto-filled based on project selection",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
)
|
| 255 |
approve_btn = gr.Button(
|
| 256 |
"✅ Generate GitHub PR", elem_classes="action-button"
|
|
|
|
| 259 |
"🔄 Restart Translation", elem_classes="action-button"
|
| 260 |
)
|
| 261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
# Event Handlers
|
| 263 |
|
| 264 |
find_btn.click(
|
| 265 |
fn=process_file_search_handler,
|
| 266 |
+
inputs=[project_dropdown, lang_dropdown, k_input, chatbot],
|
| 267 |
outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
|
| 268 |
)
|
| 269 |
+
|
| 270 |
+
confirm_go_btn.click(
|
| 271 |
+
fn=confirm_and_go_translate_handler,
|
| 272 |
+
inputs=[chatbot],
|
| 273 |
+
outputs=[chatbot, msg_input, status_display, control_tabs],
|
| 274 |
+
)
|
| 275 |
|
| 276 |
+
# Auto-save selections to state and update prompt preview
|
| 277 |
+
project_dropdown.change(
|
| 278 |
+
fn=update_project_selection,
|
| 279 |
+
inputs=[project_dropdown, chatbot],
|
| 280 |
+
outputs=[chatbot, msg_input, status_display],
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
# Update prompt preview when project changes
|
| 284 |
+
project_dropdown.change(
|
| 285 |
+
fn=update_prompt_preview,
|
| 286 |
+
inputs=[translate_lang_display, file_to_translate_input, additional_instruction],
|
| 287 |
+
outputs=[prompt_preview],
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
lang_dropdown.change(
|
| 291 |
+
fn=update_language_selection,
|
| 292 |
+
inputs=[lang_dropdown, chatbot],
|
| 293 |
+
outputs=[chatbot, msg_input, status_display, translate_lang_display],
|
| 294 |
)
|
| 295 |
|
| 296 |
#
|
|
|
|
| 303 |
# Button event handlers
|
| 304 |
start_translate_btn.click(
|
| 305 |
fn=start_translate_handler,
|
| 306 |
+
inputs=[chatbot, file_to_translate_input, additional_instruction, force_retranslate],
|
| 307 |
+
outputs=[chatbot, msg_input, status_display, control_tabs, start_translate_btn, confirm_upload_btn],
|
| 308 |
+
)
|
| 309 |
+
|
| 310 |
+
confirm_upload_btn.click(
|
| 311 |
+
fn=confirm_translation_and_go_upload_handler,
|
| 312 |
+
inputs=[chatbot],
|
| 313 |
outputs=[chatbot, msg_input, status_display, control_tabs],
|
| 314 |
)
|
| 315 |
|
| 316 |
+
# Configuration Save
|
| 317 |
save_config_btn.click(
|
| 318 |
+
fn=update_persistent_config,
|
| 319 |
+
inputs=[config_anthropic_key, config_github_token, config_github_owner, config_github_repo, reference_pr_url, chatbot],
|
| 320 |
+
outputs=[chatbot, msg_input, status_display],
|
| 321 |
)
|
| 322 |
|
| 323 |
approve_btn.click(
|
| 324 |
fn=approve_handler,
|
| 325 |
+
inputs=[chatbot, config_github_owner, config_github_repo, reference_pr_url],
|
| 326 |
outputs=[chatbot, msg_input, status_display],
|
| 327 |
)
|
| 328 |
|
pr_generator/agent.py
CHANGED
|
@@ -34,9 +34,13 @@ except ImportError as e:
|
|
| 34 |
class GitHubPRAgent:
|
| 35 |
"""Agent class for GitHub PR creation"""
|
| 36 |
|
| 37 |
-
def __init__(self):
|
| 38 |
self._github_client = None
|
| 39 |
self._llm = None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
@property
|
| 42 |
def github_client(self) -> Optional[Github]:
|
|
@@ -433,8 +437,6 @@ Please return only the commit message. No other explanation is needed."""
|
|
| 433 |
target_language: str,
|
| 434 |
filepath: str,
|
| 435 |
translated_doc: str,
|
| 436 |
-
owner: str,
|
| 437 |
-
repo_name: str,
|
| 438 |
base_branch: str = "main",
|
| 439 |
) -> Dict[str, Any]:
|
| 440 |
"""Execute translation document PR creation workflow."""
|
|
@@ -458,19 +460,20 @@ Please return only the commit message. No other explanation is needed."""
|
|
| 458 |
)
|
| 459 |
|
| 460 |
# 3. Get main branch SHA from upstream and create branch in fork
|
| 461 |
-
upstream_repo = self.github_client.get_repo(f"
|
| 462 |
main_branch = upstream_repo.get_branch(base_branch)
|
| 463 |
main_sha = main_branch.commit.sha
|
| 464 |
|
| 465 |
print(f"🌿 Creating branch: {branch_name} in fork repository")
|
| 466 |
-
branch_result = self.create_branch(
|
| 467 |
|
| 468 |
# Check branch creation result
|
| 469 |
if branch_result.startswith("ERROR"):
|
| 470 |
return {
|
| 471 |
"status": "error",
|
| 472 |
-
"message": f"Branch creation failed: {branch_result}",
|
| 473 |
"branch": branch_name,
|
|
|
|
| 474 |
}
|
| 475 |
elif branch_result.startswith("WARNING"):
|
| 476 |
print(f"⚠️ {branch_result}")
|
|
@@ -489,8 +492,8 @@ Please return only the commit message. No other explanation is needed."""
|
|
| 489 |
|
| 490 |
print(f"📄 Saving file: {target_filepath}")
|
| 491 |
file_result = self.create_or_update_file(
|
| 492 |
-
|
| 493 |
-
|
| 494 |
target_filepath,
|
| 495 |
commit_message,
|
| 496 |
translated_doc,
|
|
@@ -500,9 +503,10 @@ Please return only the commit message. No other explanation is needed."""
|
|
| 500 |
if not file_result.startswith("SUCCESS"):
|
| 501 |
return {
|
| 502 |
"status": "error",
|
| 503 |
-
"message": "
|
| 504 |
"branch": branch_name,
|
| 505 |
"file_path": target_filepath,
|
|
|
|
| 506 |
}
|
| 507 |
|
| 508 |
print(f"{file_result}")
|
|
@@ -518,11 +522,11 @@ Please return only the commit message. No other explanation is needed."""
|
|
| 518 |
)
|
| 519 |
|
| 520 |
print(f"🔄 Creating PR: {pr_title}")
|
| 521 |
-
print(f" Head: {
|
| 522 |
|
| 523 |
# Create PR from fork to upstream repository
|
| 524 |
pr_result = self.create_pull_request(
|
| 525 |
-
|
| 526 |
)
|
| 527 |
|
| 528 |
if pr_result.startswith("ERROR"):
|
|
@@ -554,7 +558,8 @@ Please return only the commit message. No other explanation is needed."""
|
|
| 554 |
except Exception as e:
|
| 555 |
return {
|
| 556 |
"status": "error",
|
| 557 |
-
"message": f"
|
|
|
|
| 558 |
}
|
| 559 |
|
| 560 |
|
|
|
|
| 34 |
class GitHubPRAgent:
|
| 35 |
"""Agent class for GitHub PR creation"""
|
| 36 |
|
| 37 |
+
def __init__(self, user_owner: str = None, user_repo: str = None, base_owner: str = None, base_repo: str = None):
|
| 38 |
self._github_client = None
|
| 39 |
self._llm = None
|
| 40 |
+
self.user_owner = user_owner
|
| 41 |
+
self.user_repo = user_repo
|
| 42 |
+
self.base_owner = base_owner
|
| 43 |
+
self.base_repo = base_repo
|
| 44 |
|
| 45 |
@property
|
| 46 |
def github_client(self) -> Optional[Github]:
|
|
|
|
| 437 |
target_language: str,
|
| 438 |
filepath: str,
|
| 439 |
translated_doc: str,
|
|
|
|
|
|
|
| 440 |
base_branch: str = "main",
|
| 441 |
) -> Dict[str, Any]:
|
| 442 |
"""Execute translation document PR creation workflow."""
|
|
|
|
| 460 |
)
|
| 461 |
|
| 462 |
# 3. Get main branch SHA from upstream and create branch in fork
|
| 463 |
+
upstream_repo = self.github_client.get_repo(f"{self.base_owner}/{self.base_repo}")
|
| 464 |
main_branch = upstream_repo.get_branch(base_branch)
|
| 465 |
main_sha = main_branch.commit.sha
|
| 466 |
|
| 467 |
print(f"🌿 Creating branch: {branch_name} in fork repository")
|
| 468 |
+
branch_result = self.create_branch(self.user_owner, self.user_repo, branch_name, main_sha)
|
| 469 |
|
| 470 |
# Check branch creation result
|
| 471 |
if branch_result.startswith("ERROR"):
|
| 472 |
return {
|
| 473 |
"status": "error",
|
| 474 |
+
"message": f"Branch creation failed: {branch_result}\n\nTarget: {self.user_owner}/{self.user_repo}\nBranch: {branch_name}\nBase SHA: {main_sha[:8]}",
|
| 475 |
"branch": branch_name,
|
| 476 |
+
"error_details": branch_result,
|
| 477 |
}
|
| 478 |
elif branch_result.startswith("WARNING"):
|
| 479 |
print(f"⚠️ {branch_result}")
|
|
|
|
| 492 |
|
| 493 |
print(f"📄 Saving file: {target_filepath}")
|
| 494 |
file_result = self.create_or_update_file(
|
| 495 |
+
self.user_owner,
|
| 496 |
+
self.user_repo,
|
| 497 |
target_filepath,
|
| 498 |
commit_message,
|
| 499 |
translated_doc,
|
|
|
|
| 503 |
if not file_result.startswith("SUCCESS"):
|
| 504 |
return {
|
| 505 |
"status": "error",
|
| 506 |
+
"message": f"File save failed: {file_result}\n\n🎯 Target: {self.user_owner}/{self.user_repo} (expected: {target_language} fork of {self.base_owner}/{self.base_repo})\n🌿 Branch: {branch_name}\n📁 File: {target_filepath}",
|
| 507 |
"branch": branch_name,
|
| 508 |
"file_path": target_filepath,
|
| 509 |
+
"error_details": file_result,
|
| 510 |
}
|
| 511 |
|
| 512 |
print(f"{file_result}")
|
|
|
|
| 522 |
)
|
| 523 |
|
| 524 |
print(f"🔄 Creating PR: {pr_title}")
|
| 525 |
+
print(f" Head: {self.user_owner}:{branch_name} → Base: {self.base_owner}:{base_branch}")
|
| 526 |
|
| 527 |
# Create PR from fork to upstream repository
|
| 528 |
pr_result = self.create_pull_request(
|
| 529 |
+
self.base_owner, self.base_repo, pr_title, f"{self.user_owner}:{branch_name}", base_branch, pr_body, draft=True
|
| 530 |
)
|
| 531 |
|
| 532 |
if pr_result.startswith("ERROR"):
|
|
|
|
| 558 |
except Exception as e:
|
| 559 |
return {
|
| 560 |
"status": "error",
|
| 561 |
+
"message": f"Workflow execution failed: {str(e)}\n\nConfig: {self.user_owner}/{self.user_repo} → {self.base_owner}/{self.base_repo}\nFile: {filepath if 'filepath' in locals() else 'Unknown'}",
|
| 562 |
+
"error_details": str(e),
|
| 563 |
}
|
| 564 |
|
| 565 |
|
translator/content.py
CHANGED
|
@@ -6,15 +6,18 @@ from langchain.callbacks import get_openai_callback
|
|
| 6 |
from langchain_anthropic import ChatAnthropic
|
| 7 |
|
| 8 |
from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
-
def get_content(filepath: str) -> str:
|
| 12 |
if filepath == "":
|
| 13 |
raise ValueError("No files selected for translation.")
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
response = requests.get(url)
|
| 19 |
if response.status_code == 200:
|
| 20 |
content = response.text
|
|
@@ -170,4 +173,4 @@ def llm_translate(to_translate: str) -> tuple[str, str]:
|
|
| 170 |
)
|
| 171 |
ai_message = model.invoke(to_translate)
|
| 172 |
print("cb:", cb)
|
| 173 |
-
return cb, ai_message.content
|
|
|
|
| 6 |
from langchain_anthropic import ChatAnthropic
|
| 7 |
|
| 8 |
from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
|
| 9 |
+
from translator.project_config import get_project_config
|
| 10 |
|
| 11 |
|
| 12 |
+
def get_content(filepath: str, project: str = "transformers") -> str:
|
| 13 |
if filepath == "":
|
| 14 |
raise ValueError("No files selected for translation.")
|
| 15 |
|
| 16 |
+
config = get_project_config(project)
|
| 17 |
+
# Extract repo path from repo_url (e.g., "huggingface/transformers")
|
| 18 |
+
repo_path = config.repo_url.replace("https://github.com/", "")
|
| 19 |
+
|
| 20 |
+
url = f"https://raw.githubusercontent.com/{repo_path}/main/{filepath}"
|
| 21 |
response = requests.get(url)
|
| 22 |
if response.status_code == 200:
|
| 23 |
content = response.text
|
|
|
|
| 173 |
)
|
| 174 |
ai_message = model.invoke(to_translate)
|
| 175 |
print("cb:", cb)
|
| 176 |
+
return str(cb), ai_message.content
|
translator/project_config.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Project configuration for different HuggingFace repositories."""
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Dict
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataclass
|
| 8 |
+
class ProjectConfig:
|
| 9 |
+
"""Configuration for a specific HuggingFace project."""
|
| 10 |
+
name: str
|
| 11 |
+
repo_url: str
|
| 12 |
+
api_url: str
|
| 13 |
+
docs_path: str
|
| 14 |
+
github_issues: Dict[str, str] # language -> issue_id
|
| 15 |
+
reference_pr_url: str
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Project configurations
|
| 19 |
+
PROJECTS = {
|
| 20 |
+
"transformers": ProjectConfig(
|
| 21 |
+
name="Transformers",
|
| 22 |
+
repo_url="https://github.com/huggingface/transformers",
|
| 23 |
+
api_url="https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1",
|
| 24 |
+
docs_path="docs/source",
|
| 25 |
+
github_issues={"ko": "20179"},
|
| 26 |
+
reference_pr_url="https://github.com/huggingface/transformers/pull/24968"
|
| 27 |
+
),
|
| 28 |
+
"smolagents": ProjectConfig(
|
| 29 |
+
name="SmolAgents",
|
| 30 |
+
repo_url="https://github.com/huggingface/smolagents",
|
| 31 |
+
api_url="https://api.github.com/repos/huggingface/smolagents/git/trees/main?recursive=1",
|
| 32 |
+
docs_path="docs/source",
|
| 33 |
+
github_issues={"ko": "20179"}, # To be filled when issue is created
|
| 34 |
+
reference_pr_url="https://github.com/huggingface/smolagents/pull/1581" # To be filled with actual PR URL
|
| 35 |
+
)
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def get_project_config(project_key: str) -> ProjectConfig:
|
| 40 |
+
"""Get project configuration by key."""
|
| 41 |
+
if project_key not in PROJECTS:
|
| 42 |
+
raise ValueError(f"Unknown project: {project_key}. Available: {list(PROJECTS.keys())}")
|
| 43 |
+
return PROJECTS[project_key]
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def get_available_projects() -> list[str]:
|
| 47 |
+
"""Get list of available project keys."""
|
| 48 |
+
return list(PROJECTS.keys())
|
translator/retriever.py
CHANGED
|
@@ -5,15 +5,26 @@ from pathlib import Path
|
|
| 5 |
import requests
|
| 6 |
|
| 7 |
from .model import Languages, Summary, TranslationDoc
|
|
|
|
| 8 |
|
| 9 |
-
URL = "https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1"
|
| 10 |
|
| 11 |
-
|
| 12 |
-
def get_github_repo_files():
|
| 13 |
"""
|
| 14 |
Get github repo files
|
| 15 |
"""
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
data = response.json()
|
| 19 |
all_items = data.get("tree", [])
|
|
@@ -26,30 +37,42 @@ def get_github_repo_files():
|
|
| 26 |
return file_paths
|
| 27 |
|
| 28 |
|
| 29 |
-
def get_github_issue_open_pr(lang: str = "ko"):
|
| 30 |
"""
|
| 31 |
-
Get open PR in the github issue, filtered by title
|
| 32 |
"""
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
)
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
headers = {
|
| 41 |
"Accept": "application/vnd.github+json",
|
| 42 |
}
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
all_open_prs = []
|
| 45 |
page = 1
|
| 46 |
per_page = 100 # Maximum allowed by GitHub API
|
| 47 |
|
| 48 |
while True:
|
| 49 |
-
|
|
|
|
| 50 |
response = requests.get(url, headers=headers)
|
| 51 |
|
| 52 |
-
if response.status_code
|
|
|
|
|
|
|
| 53 |
raise Exception(f"GitHub API error: {response.status_code} {response.text}")
|
| 54 |
|
| 55 |
page_prs = response.json()
|
|
@@ -63,19 +86,61 @@ def get_github_issue_open_pr(lang: str = "ko"):
|
|
| 63 |
if len(page_prs) < per_page:
|
| 64 |
break
|
| 65 |
|
| 66 |
-
filtered_prs = [pr for pr in all_open_prs if
|
| 67 |
|
| 68 |
-
|
|
|
|
| 69 |
|
| 70 |
-
|
| 71 |
-
"
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
return filenames, pr_info_list
|
| 80 |
|
| 81 |
|
|
@@ -99,11 +164,12 @@ def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
|
|
| 99 |
return report, first_missing_docs
|
| 100 |
|
| 101 |
|
| 102 |
-
def report(target_lang: str, top_k: int = 1) -> tuple[str, list[str]]:
|
| 103 |
"""
|
| 104 |
Generate a report for the translated docs
|
| 105 |
"""
|
| 106 |
-
docs_file
|
|
|
|
| 107 |
|
| 108 |
base_docs_path = Path("docs/source")
|
| 109 |
en_docs_path = Path("docs/source/en")
|
|
|
|
| 5 |
import requests
|
| 6 |
|
| 7 |
from .model import Languages, Summary, TranslationDoc
|
| 8 |
+
from .project_config import get_project_config
|
| 9 |
|
|
|
|
| 10 |
|
| 11 |
+
def get_github_repo_files(project: str = "transformers"):
|
|
|
|
| 12 |
"""
|
| 13 |
Get github repo files
|
| 14 |
"""
|
| 15 |
+
config = get_project_config(project)
|
| 16 |
+
|
| 17 |
+
# Add GitHub token if available to avoid rate limiting (optional)
|
| 18 |
+
headers = {}
|
| 19 |
+
github_token = os.environ.get("GITHUB_TOKEN")
|
| 20 |
+
if github_token:
|
| 21 |
+
headers["Authorization"] = f"token {github_token}"
|
| 22 |
+
|
| 23 |
+
response = requests.get(config.api_url, headers=headers)
|
| 24 |
+
|
| 25 |
+
# Handle rate limit with helpful message
|
| 26 |
+
if response.status_code == 403 and "rate limit" in response.text.lower():
|
| 27 |
+
raise Exception(f"GitHub API rate limit exceeded. To avoid this, set GITHUB_TOKEN in your environment or provide a GitHub token in the UI. Details: {response.text}")
|
| 28 |
|
| 29 |
data = response.json()
|
| 30 |
all_items = data.get("tree", [])
|
|
|
|
| 37 |
return file_paths
|
| 38 |
|
| 39 |
|
| 40 |
+
def get_github_issue_open_pr(project: str = "transformers", lang: str = "ko", all_files: list = None):
|
| 41 |
"""
|
| 42 |
+
Get open PR in the github issue, filtered by title containing '[i18n-KO]'.
|
| 43 |
"""
|
| 44 |
+
config = get_project_config(project)
|
| 45 |
+
issue_id = config.github_issues.get(lang)
|
| 46 |
+
|
| 47 |
+
# For projects without GitHub issue tracking, still search for PRs
|
| 48 |
+
if not issue_id:
|
| 49 |
+
raise ValueError(f"⚠️ No GitHub issue registered for {project}.")
|
| 50 |
|
| 51 |
+
# Require all_files parameter
|
| 52 |
+
if all_files is None:
|
| 53 |
+
raise ValueError("Repository file list must be provided")
|
| 54 |
+
|
| 55 |
headers = {
|
| 56 |
"Accept": "application/vnd.github+json",
|
| 57 |
}
|
| 58 |
|
| 59 |
+
# Add GitHub token if available to avoid rate limiting (optional)
|
| 60 |
+
github_token = os.environ.get("GITHUB_TOKEN")
|
| 61 |
+
if github_token:
|
| 62 |
+
headers["Authorization"] = f"token {github_token}"
|
| 63 |
+
|
| 64 |
all_open_prs = []
|
| 65 |
page = 1
|
| 66 |
per_page = 100 # Maximum allowed by GitHub API
|
| 67 |
|
| 68 |
while True:
|
| 69 |
+
repo_path = config.repo_url.replace("https://github.com/", "")
|
| 70 |
+
url = f"https://api.github.com/repos/{repo_path}/pulls?state=open&page={page}&per_page={per_page}"
|
| 71 |
response = requests.get(url, headers=headers)
|
| 72 |
|
| 73 |
+
if response.status_code == 403 and "rate limit" in response.text.lower():
|
| 74 |
+
raise Exception(f"GitHub API rate limit exceeded. To avoid this, set GITHUB_TOKEN in your environment or provide a GitHub token in the UI. Details: {response.text}")
|
| 75 |
+
elif response.status_code != 200:
|
| 76 |
raise Exception(f"GitHub API error: {response.status_code} {response.text}")
|
| 77 |
|
| 78 |
page_prs = response.json()
|
|
|
|
| 86 |
if len(page_prs) < per_page:
|
| 87 |
break
|
| 88 |
|
| 89 |
+
filtered_prs = [pr for pr in all_open_prs if "[i18n-KO]" in pr["title"]]
|
| 90 |
|
| 91 |
+
# Pattern to match filenames after "Translated" keyword
|
| 92 |
+
pattern = re.compile(r"Translated\s+(?:`([^`]+)`|(\S+))\s+to")
|
| 93 |
|
| 94 |
+
def find_original_file_path(filename_from_title, all_files):
|
| 95 |
+
"""Find the exact file path from repo files by matching filename"""
|
| 96 |
+
if not filename_from_title:
|
| 97 |
+
return None
|
| 98 |
+
|
| 99 |
+
# Remove .md extension for matching
|
| 100 |
+
base_name = filename_from_title.replace('.md', '')
|
| 101 |
+
|
| 102 |
+
# Look for exact matches in repo files
|
| 103 |
+
for file_path in all_files:
|
| 104 |
+
if file_path.startswith("docs/source/en/") and file_path.endswith(".md"):
|
| 105 |
+
file_base = file_path.split("/")[-1].replace('.md', '')
|
| 106 |
+
if file_base == base_name:
|
| 107 |
+
return file_path
|
| 108 |
+
|
| 109 |
+
# If no exact match, fallback to simple path
|
| 110 |
+
return f"docs/source/en/{filename_from_title}"
|
| 111 |
+
|
| 112 |
+
filenames = []
|
| 113 |
+
pr_info_list = []
|
| 114 |
+
|
| 115 |
+
for pr in filtered_prs:
|
| 116 |
+
match = pattern.search(pr["title"])
|
| 117 |
+
if match:
|
| 118 |
+
# Use group 1 (with backticks) or group 2 (without backticks)
|
| 119 |
+
filename = match.group(1) or match.group(2)
|
| 120 |
+
# Add .md extension if not present
|
| 121 |
+
if not filename.endswith('.md'):
|
| 122 |
+
filename += '.md'
|
| 123 |
+
|
| 124 |
+
# Find the correct file path by matching filename
|
| 125 |
+
correct_path = None
|
| 126 |
+
if filename:
|
| 127 |
+
# Remove .md extension for matching
|
| 128 |
+
base_name = filename.replace('.md', '')
|
| 129 |
+
|
| 130 |
+
# Look for exact matches in repo files
|
| 131 |
+
for file_path in all_files:
|
| 132 |
+
if file_path.startswith("docs/source/en/") and file_path.endswith(".md"):
|
| 133 |
+
file_base = file_path.split("/")[-1].replace('.md', '')
|
| 134 |
+
if file_base == base_name:
|
| 135 |
+
correct_path = file_path
|
| 136 |
+
break
|
| 137 |
+
|
| 138 |
+
# If no exact match, fallback to simple path
|
| 139 |
+
if not correct_path:
|
| 140 |
+
correct_path = f"docs/source/en/{filename}"
|
| 141 |
+
if correct_path:
|
| 142 |
+
filenames.append(correct_path)
|
| 143 |
+
pr_info_list.append(f"{config.repo_url}/pull/{pr['url'].rstrip('/').split('/')[-1]}")
|
| 144 |
return filenames, pr_info_list
|
| 145 |
|
| 146 |
|
|
|
|
| 164 |
return report, first_missing_docs
|
| 165 |
|
| 166 |
|
| 167 |
+
def report(project: str, target_lang: str, top_k: int = 1, docs_file: list = None) -> tuple[str, list[str]]:
|
| 168 |
"""
|
| 169 |
Generate a report for the translated docs
|
| 170 |
"""
|
| 171 |
+
if docs_file is None:
|
| 172 |
+
raise ValueError("Repository file list must be provided")
|
| 173 |
|
| 174 |
base_docs_path = Path("docs/source")
|
| 175 |
en_docs_path = Path("docs/source/en")
|