Spaces:

darthPanda
/

SentimentAnalysisTool

Runtime error

App Files Files Community

darthPanda commited on Mar 1, 2023

Commit

e0ed1f1

1 Parent(s): b2b6846

hf

Browse files

Files changed (2) hide show

app.py +41 -22
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -37,12 +37,20 @@ from transformers import pipeline
 #@st.cache_resource()
 @st.cache(allow_output_mutation=True)
-def get_model():
     tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
     model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
     return tokenizer,model
-tokenizer,model = get_model()
 def extract_text_from_pdf(path):
   text=''
@@ -69,6 +77,9 @@ def download_html():
         st.download_button(label="Download Report", data=html, file_name=file_name, mime=mime_type)
         st.stop()
 st.write("""
 # Sentiment Analysis Tool
 """)
@@ -76,22 +87,29 @@ st.write("""
 #uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False, type=['pdf'])
 uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=True, type=['pdf'])
 #if uploaded_file is not None:
-if len(uploaded_file)>0:
     import time
     # Wait for 5 seconds
     time.sleep(5)
-    #print('gone')
     pdf_reader = PyPDF2.PdfReader(uploaded_file[0])
-    # Get the number of pages in the PDF file
     num_pages = len(pdf_reader.pages)
     if num_pages > 20:
         st.error("Pages in PDF file should be less than 20.")
     # Check that only one file was uploaded
     #elif isinstance(uploaded_file, list):
     elif len(uploaded_file) > 1:
         st.error("Please upload only one PDF file at a time.")
     else:
         #uploaded_file = uploaded_file[0]
         # Check that the file is a PDF
@@ -132,14 +150,23 @@ if len(uploaded_file)>0:
             with st.spinner('Processing please wait...'):
                 pipe = pipeline(model="ProsusAI/finbert")
                 classifier = pipeline(model="ProsusAI/finbert")
                 output = classifier(useful_sentence)
                 df = pd.DataFrame.from_dict(output)
                 df['Sentence']= pd.Series(useful_sentence)
             labels = ['neutral', 'positive', 'negative']
             values = df.label.value_counts().to_list()
@@ -178,6 +205,8 @@ if len(uploaded_file)>0:
             df_temp = pd.concat([df_temp, pos_df])
             fig = make_subplots(
                 rows=26, cols=6,
                 specs=[ [None, None, None, None, None, None],
@@ -279,31 +308,21 @@ if len(uploaded_file)>0:
             # Add HTML tags to force line breaks in the title text
             wrapped_title = "<br>".join(wrapped_title.split("\n"))
-            fig.update_layout(height=700, showlegend=False, title={'text': f"<b>{wrapped_title} - Sentiment Analysis Report</b>", 'x': 0.5, 'xanchor': 'center','font': {'size': 32}})
             #pyo.plot(fig, filename='report.html')
             buffer = io.StringIO()
             fig.write_html(buffer, include_plotlyjs='cdn')
             html_bytes = buffer.getvalue().encode()
             st.download_button(
-                label='Download HTML',
                 data=html_bytes,
                 file_name='report.html',
                 mime='text/html'
             )
-            # import base64
-            # # Convert the figure to HTML format
-            # fig_html = pio.to_html(fig, full_html=False)
-            # b64 = base64.b64encode(fig_html.encode()).decode()
-            # # Generate a download link
-            # filename = "figure.html"
-            # href = f'<a href="data:file/html;base64,{b64}" download="{filename}">Download Report</a>'
-            # # Display the link
-            # st.markdown(href, unsafe_allow_html=True)

 #@st.cache_resource()
 @st.cache(allow_output_mutation=True)
+def get_sentiment_model():
     tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
     model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
     return tokenizer,model
+tokenizer_sentiment,model_sentiment = get_sentiment_model()
+@st.cache(allow_output_mutation=True)
+def get_emotion_model():
+    tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
+    model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
+    return tokenizer,model
+tokenizer_emotion,model_emotion = get_emotion_model()
 def extract_text_from_pdf(path):
   text=''
         st.download_button(label="Download Report", data=html, file_name=file_name, mime=mime_type)
         st.stop()
+if 'filename_key' not in st.session_state:
+    st.session_state.filename_key = ''
 st.write("""
 # Sentiment Analysis Tool
 """)
 #uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False, type=['pdf'])
 uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=True, type=['pdf'])
 #if uploaded_file is not None:
+if len(uploaded_file)==0:
+    #print('none')
+    st.session_state.filename_key = ''
+elif len(uploaded_file)>0:
     import time
     # Wait for 5 seconds
     time.sleep(5)
     pdf_reader = PyPDF2.PdfReader(uploaded_file[0])
     num_pages = len(pdf_reader.pages)
+    file_name = uploaded_file[0].name
+    # st.write(st.session_state.filename_key)
+    # print(file_name)
+    # st.write("Filename:", file_name)
     if num_pages > 20:
         st.error("Pages in PDF file should be less than 20.")
     # Check that only one file was uploaded
     #elif isinstance(uploaded_file, list):
     elif len(uploaded_file) > 1:
         st.error("Please upload only one PDF file at a time.")
+    elif st.session_state.filename_key == file_name:
+        st.write("Report downloaded successfully")
     else:
         #uploaded_file = uploaded_file[0]
         # Check that the file is a PDF
             with st.spinner('Processing please wait...'):
+                tokenizer = tokenizer_sentiment
+                model = model_sentiment
                 pipe = pipeline(model="ProsusAI/finbert")
                 classifier = pipeline(model="ProsusAI/finbert")
                 output = classifier(useful_sentence)
+                tokenizer = tokenizer_emotion
+                model = model_emotion
+                classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
+                output_emotion = classifier(useful_sentence)
+                #print(output_emotion[0])
                 df = pd.DataFrame.from_dict(output)
                 df['Sentence']= pd.Series(useful_sentence)
+            ############################ 3. Processing ############################
             labels = ['neutral', 'positive', 'negative']
             values = df.label.value_counts().to_list()
             df_temp = pd.concat([df_temp, pos_df])
+            ############################ 4. Plotting ############################
             fig = make_subplots(
                 rows=26, cols=6,
                 specs=[ [None, None, None, None, None, None],
             # Add HTML tags to force line breaks in the title text
             wrapped_title = "<br>".join(wrapped_title.split("\n"))
+            fig.update_layout(height=1500, showlegend=False, title={'text': f"<b>{wrapped_title} - Sentiment Analysis Report</b>", 'x': 0.5, 'xanchor': 'center','font': {'size': 32}})
             #pyo.plot(fig, filename='report.html')
+            ############################## 5. Download Report ##############################
             buffer = io.StringIO()
             fig.write_html(buffer, include_plotlyjs='cdn')
             html_bytes = buffer.getvalue().encode()
             st.download_button(
+                label='Download Report',
                 data=html_bytes,
                 file_name='report.html',
                 mime='text/html'
             )
+            st.session_state.filename_key = file_name

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-streamlit
 transformers
 torch
 PyPDF2

+streamlit==1.17.0
 transformers
 torch
 PyPDF2