deshanksuman's picture
Upload Sinhala transliteration model
c9adcfd verified
from transformers import MBartForConditionalGeneration, MBartTokenizer
def transliterate(text, model_name="deshanksuman/swabhashambert50SinhalaTransliteration"):
# Load model and tokenizer
tokenizer = MBartTokenizer.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name)
# Set the language codes
tokenizer.src_lang = "en_XX"
tokenizer.tgt_lang = "si_LK"
# Prepare input
inputs = tokenizer(
text,
return_tensors="pt",
max_length=128,
padding="max_length",
truncation=True
)
# Generate output
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_length=128,
num_beams=5,
early_stopping=True
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Example usage
if __name__ == "__main__":
# Test with example text
test_text = "mama oyata adare karanawa"
result = transliterate(test_text)
print(f"Romanized: {test_text}")
print(f"Transliterated: {result}")