|
import streamlit as st |
|
from transformers import pipeline |
|
import streamlit as st |
|
import json |
|
import torch |
|
from torch.nn import functional as F |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
@st.cache_resource |
|
def load_dicts(): |
|
with open("label2ind.json", "r") as file: |
|
label2ind = json.load(file) |
|
with open("ind2label.json", "r") as file: |
|
ind2label = json.load(file) |
|
return label2ind, ind2label |
|
|
|
@st.cache_resource |
|
def load_model(): |
|
tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract") |
|
model = AutoModelForSequenceClassification.from_pretrained( |
|
"my_model/checkpoint-23000", |
|
num_labels=len(label2ind), |
|
problem_type="single_label_classification", |
|
) |
|
return tokenizer, model |
|
|
|
label2ind, ind2label = load_dicts() |
|
tokenizer, model = load_model() |
|
|
|
title = st.text_input("Title", value="Math") |
|
abstract = st.text_input("Abstract", value="Random variable") |
|
|
|
def get_logits(title, abstract): |
|
text = title + "###" + abstract |
|
logits = model(tokenizer(text, return_tensors="pt")['input_ids'])['logits'] |
|
return logits |
|
|
|
def get_ans(logits): |
|
ind = torch.argsort(logits, dim=1, descending=True) |
|
logits = F.softmax(logits) |
|
cum_sum = 0 |
|
i = 0 |
|
while cum_sum < 0.95: |
|
cum_sum += logits[0][ind[i]] |
|
st.write(f"label {ind2label[ind[i]]} with probability {logits[0][ind[i]] * 100}%") |
|
i +=1 |
|
|
|
if title or abstract: |
|
st.write(query) |
|
st.write(result) |
|
logits = get_logits(text, abstract) |
|
|