from transformers import pipeline import torch from config import Config from typing import Dict, Any import re # Mapping delle lingue supportate con i loro nomi per i modelli Helsinki-NLP LANGUAGE_CODES = { "en": "English", "fr": "French", "de": "German", "es": "Spanish", "pt": "Portuguese", "ru": "Russian", "ja": "Japanese", "ko": "Korean", "zh": "Chinese", "ar": "Arabic", "hi": "Hindi", "nl": "Dutch", "sv": "Swedish", "da": "Danish", "no": "Norwegian", "fi": "Finnish", "pl": "Polish", "cs": "Czech", "hu": "Hungarian", "ro": "Romanian", "bg": "Bulgarian", "hr": "Croatian", "sk": "Slovak", "sl": "Slovenian", "et": "Estonian", "lv": "Latvian", "lt": "Lithuanian", "mt": "Maltese", "el": "Greek", "tr": "Turkish" } # Mapping per i modelli di traduzione Helsinki-NLP (da italiano verso altre lingue) HELSINKI_MODELS = { "en": "Helsinki-NLP/opus-mt-it-en", "fr": "Helsinki-NLP/opus-mt-it-fr", "de": "Helsinki-NLP/opus-mt-it-de", "es": "Helsinki-NLP/opus-mt-it-es", "pt": "Helsinki-NLP/opus-mt-it-pt", "ru": "Helsinki-NLP/opus-mt-it-ru", "nl": "Helsinki-NLP/opus-mt-it-nl", "sv": "Helsinki-NLP/opus-mt-it-sv", "da": "Helsinki-NLP/opus-mt-it-da", "no": "Helsinki-NLP/opus-mt-it-no", "fi": "Helsinki-NLP/opus-mt-it-fi", "pl": "Helsinki-NLP/opus-mt-it-pl", "cs": "Helsinki-NLP/opus-mt-it-cs", "hu": "Helsinki-NLP/opus-mt-it-hu", "ro": "Helsinki-NLP/opus-mt-it-ro", "bg": "Helsinki-NLP/opus-mt-it-bg", "hr": "Helsinki-NLP/opus-mt-it-hr", "sk": "Helsinki-NLP/opus-mt-it-sk", "sl": "Helsinki-NLP/opus-mt-it-sl", "et": "Helsinki-NLP/opus-mt-it-et", "lv": "Helsinki-NLP/opus-mt-it-lv", "lt": "Helsinki-NLP/opus-mt-it-lt", "el": "Helsinki-NLP/opus-mt-it-el", "tr": "Helsinki-NLP/opus-mt-it-tr" } class TranslationService: def __init__(self, device: str = "cpu"): self.device = device self.translators = {} # Cache per i translator def _get_translator(self, target_language: str): """Ottiene o crea un translator per la lingua target.""" if target_language not in self.translators: if target_language in HELSINKI_MODELS: model_name = HELSINKI_MODELS[target_language] try: self.translators[target_language] = pipeline( "translation", model=model_name, device=0 if self.device == "cuda" else -1, torch_dtype=torch.float16 if self.device == "cuda" else torch.float32 ) except Exception as e: # Fallback per lingue senza modelli specifici print(f"Modello non disponibile per {target_language}, usando fallback: {e}") return None else: return None return self.translators[target_language] def _extract_placeholders(self, text: str) -> tuple[str, Dict[str, str]]: """Estrae i placeholders HTML e delle parentesi graffe dal testo.""" placeholders = {} # Pattern per HTML tags html_pattern = r'<[^>]+>' html_matches = re.findall(html_pattern, text) # Pattern per parentesi graffe brace_pattern = r'\{[^}]+\}' brace_matches = re.findall(brace_pattern, text) # Sostituisce HTML tags con placeholders processed_text = text for i, match in enumerate(html_matches): placeholder = f"HTMLTAG{i}" placeholders[placeholder] = match processed_text = processed_text.replace(match, placeholder, 1) # Sostituisce parentesi graffe con placeholders for i, match in enumerate(brace_matches): placeholder = f"PLACEHOLDER{i}" placeholders[placeholder] = match processed_text = processed_text.replace(match, placeholder, 1) return processed_text, placeholders def _restore_placeholders(self, text: str, placeholders: Dict[str, str]) -> str: """Ripristina i placeholders nel testo tradotto.""" for placeholder, original in placeholders.items(): # Rimuovi spazi extra attorno ai placeholder text = text.replace(f" {placeholder} ", original) text = text.replace(f" {placeholder}", original) text = text.replace(f"{placeholder} ", original) text = text.replace(placeholder, original) return text def translate_text(self, text: str, target_language: str) -> str: """Traduce il testo dall'italiano alla lingua target.""" if target_language not in LANGUAGE_CODES: raise ValueError(f"Lingua non supportata: {target_language}") # Estrai placeholders clean_text, placeholders = self._extract_placeholders(text) # Ottieni il translator translator = self._get_translator(target_language) if translator is None: # Traduzione semplice di fallback (mantiene il testo originale) return f"[TRANSLATION NOT AVAILABLE: {text}]" try: # Esegui la traduzione result = translator(clean_text, max_length=512) if isinstance(result, list) and len(result) > 0: translated_text = result[0]['translation_text'] else: translated_text = str(result) # Ripristina i placeholders translated_text = self._restore_placeholders(translated_text, placeholders) return translated_text.strip() except Exception as e: print(f"Errore durante la traduzione: {e}") return f"[TRANSLATION ERROR: {text}]" def translate_locale_object(self, locale_data: Dict[str, Any], target_language: str) -> Dict[str, Any]: """Traduce un oggetto locale completo.""" if target_language not in LANGUAGE_CODES: raise ValueError(f"Lingua non supportata: {target_language}") translated_data = {} for key, content in locale_data.items(): if isinstance(content, str): translated_data[key] = self.translate_text(content, target_language) else: # Mantieni il valore originale se non รจ una stringa translated_data[key] = content return translated_data # Istanza globale del servizio di traduzione translation_service = None def get_translation_service(): global translation_service if translation_service is None: config = Config() translation_service = TranslationService(device=config.DEVICE) return translation_service def translate_locale(locale_data: Dict[str, Any], target_language: str) -> Dict[str, Any]: """Funzione helper per tradurre un oggetto locale.""" service = get_translation_service() return service.translate_locale_object(locale_data, target_language) def get_supported_languages(): """Restituisce la lista delle lingue supportate.""" return list(HELSINKI_MODELS.keys())