Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import re | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForSeq2SeqLM, | |
| ) | |
| def clean_text(text): | |
| text = text.encode("ascii", errors="ignore").decode( | |
| "ascii" | |
| ) # remove non-ascii, Chinese characters | |
| text = re.sub(r"\n", " ", text) | |
| text = re.sub(r"\n\n", " ", text) | |
| text = re.sub(r"\t", " ", text) | |
| text = re.sub(r"ADVERTISEMENT", " ", text) | |
| text = re.sub(r"ADVERTISING", " ", text) | |
| text = text.strip(" ") | |
| text = re.sub( | |
| " +", " ", text | |
| ).strip() # get rid of multiple spaces and replace with a single | |
| return text | |
| def newsroom_hd(hdchoice, text): | |
| if hdchoice == "Singapore News": | |
| modchoice = "chinhon/pegasus-newsroom-headline_writer_oct22" | |
| elif hdchoice == "International News": | |
| modchoice = "chinhon/pegasus-newsroom_wires_hdwriter42k" | |
| elif hdchoice == "Commentary": | |
| modchoice = "chinhon/bart-large-commentaries_hdwriter" | |
| elif hdchoice == "News in Malay": | |
| modchoice = "chinhon/pegasus-newsroom-malay_headlines" | |
| else: | |
| modchoice = "chinhon/pegasus-newsroom-headline_writer_oct22" | |
| input_text = clean_text(text) | |
| tokenizer = AutoTokenizer.from_pretrained(modchoice) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(modchoice) | |
| with tokenizer.as_target_tokenizer(): | |
| batch = tokenizer( | |
| input_text, truncation=True, padding="longest", return_tensors="pt" | |
| ) | |
| raw = model.generate(**batch) | |
| headline = tokenizer.batch_decode(raw, skip_special_tokens=True) | |
| return headline[0] | |
| gradio_ui = gr.Interface( | |
| fn=newsroom_hd, | |
| title="Generate Newsroom Headlines With AI", | |
| description="**How to use**: Select the type of headline you wish to generate, paste in a relevant amount of text, and click submit.", | |
| article="**Note**: Paste in as much text as you think necessary, though there's an automatic cut-off of about 500 words for some models and about 850 words for others. If you copy-and-paste directly from a website, take note to remove unrelated text such as those for advertisements and recommended links.", | |
| inputs=[ | |
| gr.Dropdown( | |
| label="Select the type of headlines you would like to generate", | |
| choices=[ | |
| "Singapore News", | |
| "International News", | |
| "Commentary", | |
| "News in Malay", | |
| ], | |
| value="Singapore News", | |
| ), | |
| gr.Textbox(label="Paste text here"), | |
| ], | |
| outputs=gr.Textbox(label="Suggested Headline"), | |
| ) | |
| gradio_ui.queue().launch() | |