AndrewLam489 commited on
Commit
92c3a1e
·
verified ·
1 Parent(s): 3f07df6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -1,33 +1,33 @@
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
- # Ensure the model and tokenizer are available
5
- tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50", use_fast=True)
6
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50")
7
 
8
  def translate_to_japanese(english_text):
9
- # Translation prompt
10
- input_text = f"translate English to Japanese: {english_text}"
11
 
12
  # Tokenize input text
13
  inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
14
 
15
- # Generate translation
16
  outputs = model.generate(inputs['input_ids'], max_length=128, num_beams=4, early_stopping=True)
17
 
18
  # Decode the generated token IDs into a string
19
  translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
-
21
  return translated_text
22
 
23
  # Streamlit UI
24
  st.title("English to Japanese Translator")
25
  st.write("This app translates English sentences to Japanese using the mBART model.")
26
 
27
- # User input
28
  english_sentence = st.text_area("Enter English sentence:", "")
29
 
30
- # Translate and display result
31
  if english_sentence:
32
  with st.spinner("Translating..."):
33
  translation = translate_to_japanese(english_sentence)
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
+ # Load model and tokenizer
5
+ tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50")
6
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50")
7
 
8
  def translate_to_japanese(english_text):
9
+ # Add language tokens for translation (from English > to Japanese)
10
+ input_text = f">en< {english_text} </s>"
11
 
12
  # Tokenize input text
13
  inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
14
 
15
+ # Generate translation (the model's output should be a token sequence for the translation)
16
  outputs = model.generate(inputs['input_ids'], max_length=128, num_beams=4, early_stopping=True)
17
 
18
  # Decode the generated token IDs into a string
19
  translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
+
21
  return translated_text
22
 
23
  # Streamlit UI
24
  st.title("English to Japanese Translator")
25
  st.write("This app translates English sentences to Japanese using the mBART model.")
26
 
27
+ # Create an input box for the user to enter an English sentence
28
  english_sentence = st.text_area("Enter English sentence:", "")
29
 
30
+ # Translate and display the result
31
  if english_sentence:
32
  with st.spinner("Translating..."):
33
  translation = translate_to_japanese(english_sentence)