Spaces:
Runtime error
Runtime error
import torch | |
import gradio as gr | |
from transformers import ( | |
AutoModelForSeq2SeqLM, | |
AutoTokenizer, | |
AutoModelForTokenClassification, | |
) | |
import googlesearch | |
def summarize(text): | |
checkpoint = "sshleifer/distilbart-cnn-12-6" | |
tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) | |
inputs = tokenizer(text, truncation=True, return_tensors="pt").input_ids | |
outputs = model.generate(inputs, max_new_tokens=100, do_sample=False) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
def generate_question(text): | |
checkpoint = "mrm8488/t5-base-finetuned-question-generation-ap" | |
tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=False) | |
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) | |
prompt = f"answer: {text} context: {text}" | |
inputs = tokenizer(prompt, truncation=True, return_tensors="pt").input_ids | |
outputs = model.generate(inputs) | |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
_, question = generated_text.split("question: ") | |
return question | |
def get_similar_articles(text): | |
tokenizer = AutoTokenizer.from_pretrained("yanekyuk/bert-keyword-extractor") | |
model = AutoModelForTokenClassification.from_pretrained("yanekyuk/bert-keyword-extractor") | |
inputs = tokenizer(text, truncation=True, return_tensors="pt") | |
outputs = model(**inputs) | |
keyword_tokens = [] | |
current_keyword_tokens = [] | |
for token, logits in zip(inputs.input_ids[0], outputs.logits[0]): | |
token_type = torch.argmax(logits).item() | |
if token_type > 0: | |
current_keyword_tokens.append(token.item()) | |
elif len(current_keyword_tokens) > 0: | |
keyword_tokens.append(current_keyword_tokens) | |
current_keyword_tokens = [] | |
keywords = tokenizer.batch_decode(keyword_tokens) | |
keywords = list(set(keywords)) | |
similar_websites = [] | |
for keyword in keywords[:3]: | |
websites = googlesearch.search( | |
keyword, | |
tld="com", | |
lang="en", | |
num=3, | |
stop=3, | |
pause=0.5, | |
) | |
similar_websites += list(websites) | |
return "\n".join(similar_websites) | |
summarize_interface = gr.Interface(fn=summarize, inputs="text", outputs="text") | |
question_interface = gr.Interface(fn=generate_question, inputs="text", outputs="text") | |
similar_articles_interface = gr.Interface(fn=get_similar_articles, inputs="text", outputs="text") | |
tabs = gr.TabbedInterface( | |
[summarize_interface, question_interface, similar_articles_interface], | |
["Summarize an article", "Generate a question", "Get similar articles"], | |
) | |
tabs.launch() | |