import streamlit as st import streamlit as st import tensorflow as tf from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification import pickle import numpy import shutil import zipfile import pandas as pd from tensorflow.keras import activations with zipfile.ZipFile("model_new.zip", "r") as zip_ref: # Extract its contents to the current working directory zip_ref.extractall(".") # Load the model and tokenizer new_model = TFDistilBertForSequenceClassification.from_pretrained('./model_new/clf') model_name, max_len = pickle.load(open('./model_new/info.pkl', 'rb')) #model_name='distilbert-base-uncased' #max_len=512 tokenizer = DistilBertTokenizer.from_pretrained(model_name) df = pd.read_csv('milestone4.csv') # Define a function to generate predictions def predict_proba(text): x = [text] encodings = tokenizer(x, max_length=max_len, truncation=True, padding=True) tfdataset = tf.data.Dataset.from_tensor_slices(dict(encodings)).batch(1) preds = new_model.predict(tfdataset).logits preds = tf.nn.softmax(preds, axis=1).numpy() return preds[0][0] # Define the Streamlit app def app(): # Create a drop-down menu to select the filing number selected_patent = st.selectbox("Select a patent number", df['Patent Number']) selected_index = df[df['Patent Number'] == selected_patent].index[0] # Display the abstract and claims of the selected patent st.subheader('Abstract:') #st.write(patents[filing_number]['abstract']) st.write( df.loc[selected_index, 'Abstract']) abstract_c=df.loc[selected_index, 'Abstract'] st.subheader('Claims:') st.write( df.loc[selected_index, 'Claims']) # Add a submit button for the user to generate predictions if st.button('Submit'): # Generate a prediction for the selected patent's claims proba = predict_proba(abstract_c) # Display the prediction st.subheader('Prediction') st.write(f'The probability of the claims being accepted is {proba:.5f}.') if __name__ == '__main__': app()