from transformers import MBartForConditionalGeneration, MBartTokenizer def transliterate(text, model_name="deshanksuman/swabhashambert50SinhalaTransliteration"): # Load model and tokenizer tokenizer = MBartTokenizer.from_pretrained(model_name) model = MBartForConditionalGeneration.from_pretrained(model_name) # Set the language codes tokenizer.src_lang = "en_XX" tokenizer.tgt_lang = "si_LK" # Prepare input inputs = tokenizer( text, return_tensors="pt", max_length=128, padding="max_length", truncation=True ) # Generate output outputs = model.generate( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_length=128, num_beams=5, early_stopping=True ) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Example usage if __name__ == "__main__": # Test with example text test_text = "mama oyata adare karanawa" result = transliterate(test_text) print(f"Romanized: {test_text}") print(f"Transliterated: {result}")