File size: 1,565 Bytes
3d4f52d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import streamlit as st
from transformers import pipeline
import streamlit as st
import json
import torch
from torch.nn import functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
@st.cache_resource
def load_dicts():
with open("label2ind.json", "r") as file:
label2ind = json.load(file)
with open("ind2label.json", "r") as file:
ind2label = json.load(file)
return label2ind, ind2label
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
model = AutoModelForSequenceClassification.from_pretrained(
"my_model/checkpoint-23000",
num_labels=len(label2ind),
problem_type="single_label_classification",
)
return tokenizer, model
label2ind, ind2label = load_dicts()
tokenizer, model = load_model()
title = st.text_input("Title", value="Math")
abstract = st.text_input("Abstract", value="Random variable")
def get_logits(title, abstract):
text = title + "###" + abstract
logits = model(tokenizer(text, return_tensors="pt")['input_ids'])['logits']
return logits
def get_ans(logits):
ind = torch.argsort(logits, dim=1, descending=True)
logits = F.softmax(logits)
cum_sum = 0
i = 0
while cum_sum < 0.95:
cum_sum += logits[0][ind[i]]
st.write(f"label {ind2label[ind[i]]} with probability {logits[0][ind[i]] * 100}%")
i +=1
if title or abstract:
st.write(query)
st.write(result)
logits = get_logits(text, abstract)
|