--- license: mit datasets: - wmt/wmt14 language: - fr - en base_model: - google-t5/t5-base --- This model was finetuned using 50 K French English sentence pairs on WMT14 Fr En dataset. ```python from transformers import T5Tokenizer, T5ForConditionalGeneration # Load the pre-trained model and tokenizer model_name = "SynapseQAI/T5-base-wmt14" tokenizer = T5Tokenizer.from_pretrained(model_name) model = T5ForConditionalGeneration.from_pretrained(model_name) # Function to translate using a different generation strategy def translate(sentence, strategy='beam_search'): # Prepare the input for the model input_text = f"translate French to English: {sentence}" input_ids = tokenizer(input_text, return_tensors="pt").input_ids # Choose generation strategy if strategy == 'beam_search': outputs = model.generate(input_ids, num_beams=3, max_length=50, early_stopping=True) elif strategy == 'top_k_sampling': outputs = model.generate(input_ids, do_sample=True, top_k=50, max_length=50) elif strategy == 'top_p_sampling': outputs = model.generate(input_ids, do_sample=True, top_p=0.92, max_length=50) elif strategy == 'temperature_sampling': outputs = model.generate(input_ids, do_sample=True, temperature=0.7, max_length=50) else: # Default to greedy decoding outputs = model.generate(input_ids, max_length=50) # Decode the generated translation translation = tokenizer.decode(outputs[0], skip_special_tokens=True) return translation # French sentences from easy to advanced sentences = [ "Il fait beau aujourd'hui.", "J'aime lire des livres et regarder des films pendant mon temps libre.", "Si j'avais su que tu venais, j'aurais préparé quelque chose de spécial pour le dîner.", "Même si les avancées technologiques apportent de nombreux avantages, elles posent également des défis éthiques considérables qu'il nous faut relever." ] # Translate each sentence with different strategies for sentence in sentences: translated_sentence = translate(sentence, strategy='beam_search') # You can try 'top_k_sampling', 'top_p_sampling', 'temperature_sampling' print(f"French: {sentence}\nEnglish (Beam Search): {translated_sentence}\n") translated_sentence = translate(sentence, strategy='top_k_sampling') print(f"English (Top-k Sampling): {translated_sentence}\n") translated_sentence = translate(sentence, strategy='top_p_sampling') print(f"English (Top-p Sampling): {translated_sentence}\n") translated_sentence = translate(sentence, strategy='temperature_sampling') print(f"English (Temperature Sampling): {translated_sentence}\n")