File size: 1,565 Bytes
3d4f52d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import streamlit as st
from transformers import pipeline
import streamlit as st
import json
import torch
from torch.nn import functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification

@st.cache_resource
def load_dicts():
    with open("label2ind.json", "r") as file:
        label2ind = json.load(file)
    with open("ind2label.json", "r") as file:
        ind2label = json.load(file)
    return label2ind, ind2label

@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
    model = AutoModelForSequenceClassification.from_pretrained(
        "my_model/checkpoint-23000",
        num_labels=len(label2ind),
        problem_type="single_label_classification",
    )
    return tokenizer, model

label2ind, ind2label = load_dicts()
tokenizer, model = load_model()

title = st.text_input("Title", value="Math")
abstract = st.text_input("Abstract", value="Random variable")

def get_logits(title, abstract):
    text = title + "###" + abstract
    logits = model(tokenizer(text, return_tensors="pt")['input_ids'])['logits']
    return logits

def get_ans(logits):
    ind = torch.argsort(logits, dim=1, descending=True)
    logits = F.softmax(logits)
    cum_sum = 0
    i = 0
    while cum_sum < 0.95:
        cum_sum += logits[0][ind[i]]
        st.write(f"label {ind2label[ind[i]]} with probability {logits[0][ind[i]] * 100}%")
        i +=1

if title or abstract:
    st.write(query)
    st.write(result)
    logits = get_logits(text, abstract)