Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import os.path | |
| import pathlib | |
| import pandas as pd | |
| import numpy as np | |
| import PyPDF2 | |
| from PyPDF2 import PdfReader | |
| from os import walk | |
| import nltk | |
| import glob | |
| import plotly.express as px | |
| from wordcloud import WordCloud | |
| import plotly.io as pio | |
| from plotly.subplots import make_subplots | |
| import plotly.graph_objs as go | |
| import pandas as pd | |
| import plotly.offline as pyo | |
| import io | |
| from io import StringIO | |
| #@st.cache_resource() | |
| def get_nl(): | |
| return nltk.download('punkt') | |
| get_nl() | |
| from nltk.tokenize import sent_tokenize | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from transformers import pipeline | |
| # if os.path.exists("report.html"): | |
| # os.remove("report.html") | |
| #@st.cache_resource() | |
| def get_sentiment_model(): | |
| tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert") | |
| model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert") | |
| return tokenizer,model | |
| tokenizer_sentiment,model_sentiment = get_sentiment_model() | |
| def get_emotion_model(): | |
| tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base") | |
| model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base") | |
| return tokenizer,model | |
| tokenizer_emotion,model_emotion = get_emotion_model() | |
| def get_intent_model(): | |
| classifier = pipeline("zero-shot-classification", model='cross-encoder/nli-deberta-v3-small') | |
| return classifier | |
| intent_classifier = get_intent_model() | |
| def extract_text_from_pdf(path): | |
| text='' | |
| reader = PdfReader(path) | |
| number_of_pages = len(reader.pages) | |
| print(number_of_pages) | |
| for i in range(number_of_pages): | |
| page=reader.pages[i] | |
| text = text + page.extract_text() | |
| return text | |
| # Create a button to download the HTML file | |
| def download_html(): | |
| with st.spinner('Downloading HTML file...'): | |
| # Get the HTML content | |
| with open('report.html', "r") as f: | |
| html = f.read() | |
| f.close() | |
| # Set the file name and content type | |
| file_name = "report.html" | |
| mime_type = "text/html" | |
| # Use st.download_button() to create a download button | |
| print('download button') | |
| st.download_button(label="Download Report", data=html, file_name=file_name, mime=mime_type) | |
| st.stop() | |
| if 'filename_key' not in st.session_state: | |
| st.session_state.filename_key = '' | |
| st.write(""" | |
| # Dcoument Analysis Tool | |
| """) | |
| #uploaded_file = st.file_uploader("Choose a PDF file") | |
| #uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False, type=['pdf']) | |
| uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=True, type=['pdf']) | |
| #if uploaded_file is not None: | |
| if len(uploaded_file)==0: | |
| #print('none') | |
| st.session_state.filename_key = '' | |
| elif len(uploaded_file)>0: | |
| import time | |
| # Wait for 5 seconds | |
| time.sleep(5) | |
| pdf_reader = PyPDF2.PdfReader(uploaded_file[0]) | |
| num_pages = len(pdf_reader.pages) | |
| file_name = uploaded_file[0].name | |
| # st.write(st.session_state.filename_key) | |
| # print(file_name) | |
| # st.write("Filename:", file_name) | |
| if num_pages > 20: | |
| st.error("Pages in PDF file should be less than 20.") | |
| # Check that only one file was uploaded | |
| #elif isinstance(uploaded_file, list): | |
| elif len(uploaded_file) > 1: | |
| st.error("Please upload only one PDF file at a time.") | |
| elif st.session_state.filename_key == file_name: | |
| st.write("Report downloaded successfully") | |
| else: | |
| #uploaded_file = uploaded_file[0] | |
| # Check that the file is a PDF | |
| if uploaded_file[0].type != 'application/pdf': | |
| st.error("Please upload a PDF file.") | |
| else: | |
| ############################ 1. Extract text from PDF ############################ | |
| text='' | |
| # return text from pdf | |
| pdf_reader = PyPDF2.PdfReader(uploaded_file[0]) | |
| # Get the number of pages in the PDF file | |
| num_pages = len(pdf_reader.pages) | |
| # Display the number of pages in the PDF file | |
| st.write(f"Number of pages in PDF file: {num_pages}") | |
| for i in range(num_pages): | |
| page=pdf_reader.pages[i] | |
| text = text + page.extract_text() | |
| ############################ 2. Running models ############################ | |
| text = text.replace("\n", " " ) | |
| text = text.replace("$", "dollar " ) | |
| sentences = sent_tokenize(text) | |
| title = sentences[0] | |
| long_sentence=[] | |
| small_sentence=[] | |
| useful_sentence=[] | |
| for i in sentences: | |
| if len(i) > 510: | |
| long_sentence.append(i) | |
| elif len(i) < 50: | |
| small_sentence.append(i) | |
| else: | |
| useful_sentence.append(i) | |
| useful_sentence_len = len(useful_sentence) | |
| del sentences | |
| ############################ 2.1 Sentiment Modeling ############################ | |
| placeholder1 = st.empty() | |
| placeholder1.text('Performing Sentiment Analysis...') | |
| #with st.empty(): | |
| my_bar = st.progress(0) | |
| tokenizer = tokenizer_sentiment | |
| model = model_sentiment | |
| pipe = pipeline(model="ProsusAI/finbert") | |
| classifier = pipeline(model="ProsusAI/finbert") | |
| #output = classifier(useful_sentence) | |
| output=[] | |
| i=0 | |
| for temp in useful_sentence: | |
| output.extend(classifier(temp)) | |
| i=i+1 | |
| my_bar.progress(int((i/useful_sentence_len)*100)) | |
| my_bar.empty() | |
| df = pd.DataFrame.from_dict(output) | |
| df['Sentence']= pd.Series(useful_sentence) | |
| ############################ 2.2 Emotion Modeling ############################ | |
| #placeholder2 = st.empty() | |
| placeholder1.text('Performing Emotion Analysis...') | |
| # with st.empty(): | |
| my_bar = st.progress(0) | |
| tokenizer = tokenizer_emotion | |
| model = model_emotion | |
| classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1) | |
| output_emotion = [] | |
| i=0 | |
| for temp in useful_sentence: | |
| output_emotion.extend(classifier(temp)[0]) | |
| i=i+1 | |
| my_bar.progress(int((i/useful_sentence_len)*100)) | |
| my_bar.empty() | |
| placeholder1.text('Emotion Analysis Completed') | |
| ############################ 2.3 Intent Modeling ############################ | |
| placeholder1.text('Performing Intent Analysis...') | |
| my_bar = st.progress(0) | |
| candidate_labels = ['complaint', 'suggestion', 'query'] | |
| classifier = intent_classifier | |
| # temp_intent = classifier(useful_sentence, candidate_labels) | |
| # output_intent=[] | |
| # for temp in temp_intent: | |
| # output_intent.append({'label' : temp['labels'][0], 'score' : temp['scores'][0]}) | |
| output_intent=[] | |
| i=0 | |
| for temp1 in useful_sentence: | |
| temp = classifier(temp1, candidate_labels) | |
| output_intent.append({'label' : temp['labels'][0], 'score' : temp['scores'][0]}) | |
| i=i+1 | |
| my_bar.progress(int((i/useful_sentence_len)*100)) | |
| df_intent = pd.DataFrame.from_dict(output_intent) | |
| df_intent['Sentence']= pd.Series(useful_sentence) | |
| my_bar.empty() | |
| placeholder1.text('Processing Completed') | |
| ############################ 3. Processing ############################ | |
| ############################ 3.1. Sentiment Analysis ############################ | |
| # labels = ['neutral', 'positive', 'negative'] | |
| # values = df.label.value_counts().to_list() | |
| labels = ['neutral', 'positive', 'negative'] | |
| values = [df[df['label']=='neutral'].shape[0], df[df['label']=='positive'].shape[0], df[df['label']=='negative'].shape[0]] | |
| # removing words | |
| words_to_remove = ["s", "quarter", "thank", "million", "Thank", "quetion", 'wa', 'rate', 'firt', | |
| "customer", "business", "last year", "year", 'lat', 'well', 'jut', 'thi', 'cutomer', | |
| "will", "think", "higher", "question", "going"] | |
| for word in words_to_remove: | |
| text = text.replace(word, "") | |
| wordcloud = WordCloud(background_color='white', width=800, height=400).generate(text) | |
| image = wordcloud.to_image() | |
| pos_df = df[df['label']=='positive'] | |
| pos_df = pos_df[['score', 'Sentence']] | |
| pos_df = pos_df.sort_values('score', ascending=False) | |
| pos_df_mean = pos_df.score.mean() | |
| pos_df['score'] = pos_df['score'].round(4) | |
| pos_df.rename(columns = {'Sentence':'Positive Sentences'}, inplace = True) | |
| num_of_pos_sentences = pos_df.shape[0] | |
| if num_of_pos_sentences == 0: | |
| pos_df.loc[0] = [0.0, '-------No positive sentences found in report-------'] | |
| neg_df = df[df['label']=='negative'] | |
| neg_df = neg_df[['score', 'Sentence']] | |
| neg_df = neg_df.sort_values('score', ascending=False) | |
| neg_df_mean = neg_df.score.mean() | |
| neg_df['score'] = neg_df['score'].round(4) | |
| neg_df.rename(columns = {'Sentence':'Negative Sentences'}, inplace = True) | |
| num_of_neg_sentences = neg_df.shape[0] | |
| if num_of_neg_sentences == 0: | |
| neg_df.loc[0] = [0.0, '-------No negative sentences found in report-------'] | |
| neu_df = df[df['label']=='neutral'] | |
| neu_df = neu_df[['score', 'Sentence']] | |
| neu_df = neu_df.sort_values('score', ascending=False) | |
| #neu_df_mean = neu_df.score.mean() | |
| neu_df['score'] = neu_df['score'].round(4) | |
| neu_df.rename(columns = {'Sentence':'Neutral Sentences'}, inplace = True) | |
| num_of_neu_sentences = neu_df.shape[0] | |
| if num_of_neu_sentences == 0: | |
| neu_df.loc[0] = [0.0, '-------No neutral sentences found in report-------'] | |
| # df_temp = neg_df | |
| # df_temp = df_temp['score'] * -1 | |
| # df_temp = pd.concat([df_temp, pos_df]) | |
| df_temp = neg_df | |
| df_temp['score'] = df_temp['score'] * -1 | |
| df_temp_list = df_temp['score'].to_list() + pos_df['score'].to_list() | |
| mean = sum(df_temp_list) / len(df_temp_list) | |
| ############################ 3.2. Emotion Analysis ############################ | |
| df_emotion = pd.DataFrame.from_dict(output_emotion) | |
| df_emotion['Sentence']= pd.Series(useful_sentence) | |
| df_joy = df_emotion[df_emotion['label']=='joy'] | |
| df_joy = df_joy[['score', 'Sentence']] | |
| df_joy = df_joy.sort_values('score', ascending=False) | |
| df_joy['score'] = df_joy['score'].round(4) | |
| df_joy.rename(columns = {'Sentence':'Joy Sentences'}, inplace = True) | |
| num_of_joy_sentences = df_joy.shape[0] | |
| if num_of_joy_sentences == 0: | |
| df_joy.loc[0] = [0.0, '-------No joy sentences found in report-------'] | |
| df_sadness = df_emotion[df_emotion['label']=='sadness'] | |
| df_sadness = df_sadness[['score', 'Sentence']] | |
| df_sadness = df_sadness.sort_values('score', ascending=False) | |
| df_sadness['score'] = df_sadness['score'].round(4) | |
| df_sadness.rename(columns = {'Sentence':'Sad Sentences'}, inplace = True) | |
| num_of_sad_sentences = df_sadness.shape[0] | |
| if num_of_sad_sentences == 0: | |
| df_sadness.loc[0] = [0.0, '-------No sad sentences found in report-------'] | |
| df_anger = df_emotion[df_emotion['label']=='anger'] | |
| df_anger = df_anger[['score', 'Sentence']] | |
| df_anger = df_anger.sort_values('score', ascending=False) | |
| df_anger['score'] = df_anger['score'].round(4) | |
| df_anger.rename(columns = {'Sentence':'Angry Sentences'}, inplace = True) | |
| num_of_anger_sentences = df_anger.shape[0] | |
| if num_of_anger_sentences == 0: | |
| df_anger.loc[0] = [0.0, '-------No angry sentences found in report-------'] | |
| df_surprise = df_emotion[df_emotion['label']=='surprise'] | |
| df_surprise = df_surprise[['score', 'Sentence']] | |
| df_surprise = df_surprise.sort_values('score', ascending=False) | |
| df_surprise['score'] = df_surprise['score'].round(4) | |
| df_surprise.rename(columns = {'Sentence':'Surprised Sentences'}, inplace = True) | |
| num_of_surprise_sentences = df_surprise.shape[0] | |
| if num_of_surprise_sentences == 0: | |
| df_surprise.loc[0] = [0.0, '-------No surprised sentences found in report-------'] | |
| # df_temp_emotion = df_sadness | |
| # df_temp_emotion = pd.concat([df_sadness, df_anger]) | |
| # df_temp_emotion = df_temp_emotion['score'] * -1 | |
| # df_temp_emotion = pd.concat([df_temp_emotion, df_joy]) | |
| df_temp_emotion = df_sadness | |
| df_temp_emotion['score'] = df_temp_emotion['score'] * -1 | |
| df_temp_emotion_list = df_temp_emotion['score'].to_list() + df_joy['score'].to_list() | |
| emotion_mean = sum(df_temp_emotion_list) / len(df_temp_emotion_list) | |
| # df_temp = neg_df | |
| # df_temp['score'] = df_temp['score'] * -1 | |
| # df_temp_list = df_temp['score'].to_list() + pos_df['score'].to_list() | |
| # mean = sum(df_temp_list) / len(df_temp_list) | |
| ############################ 3.3. Intent Analysis ############################ | |
| df_query = df_intent[df_intent['label']=='query'] | |
| df_query = df_query[['score', 'Sentence']] | |
| df_query = df_query.sort_values('score', ascending=False) | |
| df_query['score'] = df_query['score'].round(4) | |
| df_query.rename(columns = {'Sentence':'Queries'}, inplace = True) | |
| df_query = df_query[df_query['score']>0.5] | |
| num_of_queries = df_query.shape[0] | |
| if num_of_queries == 0: | |
| df_query.loc[0] = [0.0, '-------No queries found in report-------'] | |
| df_complaint = df_intent[df_intent['label']=='complaint'] | |
| df_complaint = df_complaint[['score', 'Sentence']] | |
| df_complaint = df_complaint.sort_values('score', ascending=False) | |
| df_complaint['score'] = df_complaint['score'].round(4) | |
| df_complaint.rename(columns = {'Sentence':'Complaints'}, inplace = True) | |
| df_complaint = df_complaint[df_complaint['score']>0.5] | |
| num_of_complaints = df_complaint.shape[0] | |
| if num_of_complaints == 0: | |
| df_complaint.loc[0] = [0.0, '-------No complaints found in report-------'] | |
| df_suggestion = df_intent[df_intent['label']=='suggestion'] | |
| df_suggestion = df_suggestion[['score', 'Sentence']] | |
| df_suggestion = df_suggestion.sort_values('score', ascending=False) | |
| df_suggestion['score'] = df_suggestion['score'].round(4) | |
| df_suggestion.rename(columns = {'Sentence':'Suggestions'}, inplace = True) | |
| df_suggestion = df_suggestion[df_suggestion['score']>0.5] | |
| num_of_suggestions = df_suggestion.shape[0] | |
| if num_of_suggestions == 0: | |
| df_suggestion.loc[0] = [0.0, '-------No suggestions found in report-------'] | |
| total_num_of_intent = num_of_queries + num_of_complaints + num_of_suggestions | |
| ############################ 4. Plotting ############################ | |
| fig = make_subplots( | |
| rows=62, cols=6, | |
| specs=[ [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None], | |
| [None, None, None, None, None, None], | |
| [{"type": "pie", "rowspan": 6, "colspan": 2}, None, {"type": "indicator", "rowspan": 6, "colspan": 2}, None, {"type": "indicator", "rowspan": 6, "colspan": 2}, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [{"type": "image", "rowspan": 5, "colspan": 3}, None, None, {"type": "table", "rowspan": 5, "colspan": 3}, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [{"type": "table", "rowspan": 5, "colspan": 3}, None, None, {"type": "table", "rowspan": 5, "colspan": 3}, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [{"type": "bar", "rowspan": 6, "colspan": 6}, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [{"type": "table", "rowspan": 2, "colspan": 3}, None, None, {"type": "table", "rowspan": 2, "colspan": 3}, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [{"type": "table", "rowspan": 2, "colspan": 3}, None, None, {"type": "table", "rowspan": 2, "colspan": 3}, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, {"type": "indicator", "rowspan": 2, "colspan": 5}, None, None, None, None],#first bullet | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, {"type": "indicator", "rowspan": 2, "colspan": 5}, None, None, None, None], #2nd bullet | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, {"type": "indicator", "rowspan": 2, "colspan": 5}, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [{"type": "table", "rowspan": 4, "colspan": 2}, None, {"type": "table", "rowspan": 4, "colspan": 2}, None, {"type": "table", "rowspan": 4, "colspan": 2}, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| [None, None, None, None, None, None], | |
| ], | |
| ) | |
| ############################ 4.1. Sentiment Analysis ############################ | |
| fig.add_trace(go.Indicator( | |
| mode = "number", | |
| value = int(mean*100), | |
| number = {"suffix": "%"}, | |
| title = {"text": "<span style='font-size:1.5em'>Sentiment Analysis</span><br><span style='font-size:0.8em;color:gray'>Positivity Score</span>"} | |
| ), row=4, col=3) | |
| colors = px.colors.diverging.Portland#RdBu | |
| fig.add_trace(go.Pie(labels=labels, values=values, hole = 0.5, | |
| title = 'Count by label', | |
| marker=dict(colors=colors, | |
| line=dict(width=2, color='white'))), | |
| row=6, col=1) | |
| fig.add_trace(go.Indicator( | |
| mode = "number", | |
| value = len(df.label.values.tolist()), | |
| title = {"text": "Count of Sentence"}), row=6, col=3) | |
| #fig.update_traces(title_text="Sentiment Analysis", selector=dict(type='indicator'), row=6, col=3) | |
| fig.add_trace(go.Indicator( | |
| mode = "gauge+number", | |
| value = mean, | |
| domain = {'x': [0, 1], 'y': [0, 1]}, | |
| title = {'text': "Average of Score", 'font': {'size': 16}}, | |
| gauge = { | |
| 'axis': {'range': [-1, 1], 'tickwidth': 1, 'tickcolor': "darkblue"}, | |
| 'bar': {'color': "darkblue"}, | |
| 'steps': [ | |
| {'range': [-0.29, 0.29], 'color': 'white'}, | |
| {'range': [0.3, 1], 'color': 'green'}, | |
| {'range': [-1, -0.3], 'color': 'red'} | |
| ], | |
| 'threshold': { | |
| 'line': {'color': "black", 'width': 4}, | |
| 'thickness': 0.75, | |
| 'value': abs((pos_df_mean - neg_df_mean)) | |
| } | |
| } | |
| ), row=6, col=5) | |
| if mean < -0.29: | |
| fig.update_traces(title_text="Cummulative Sentiment Negative", selector=dict(type='indicator'), row=6, col=5) | |
| elif mean < 0.29: | |
| fig.update_traces(title_text="Cummulative Sentiment Neutral", selector=dict(type='indicator'), row=6, col=5) | |
| else: | |
| fig.update_traces(title_text="Cummulative Sentiment Positive", selector=dict(type='indicator'), row=6, col=5) | |
| fig.add_trace(go.Image(z=image), row=13, col=1) | |
| fig.update_xaxes(visible=False, row=13, col=1) | |
| fig.update_yaxes(visible=False, row=13, col=1) | |
| table_trace1 = go.Table( | |
| header=dict(values=list(pos_df.columns), fill_color='lightgray', align='left'), | |
| cells=dict(values=[pos_df[name] for name in pos_df.columns], fill_color='white', align='left'), | |
| columnwidth=[1, 4] | |
| ) | |
| fig.add_trace(table_trace1, row=13, col=4) | |
| table_trace2 = go.Table( | |
| header=dict(values=list(neg_df.columns), fill_color='lightgray', align='left'), | |
| cells=dict(values=[neg_df[name] for name in neg_df.columns], fill_color='white', align='left'), | |
| columnwidth=[1, 4] | |
| ) | |
| fig.add_trace(table_trace2, row=18, col=4) | |
| table_trace2 = go.Table( | |
| header=dict(values=list(neu_df.columns), fill_color='lightgray', align='left'), | |
| cells=dict(values=[neu_df[name] for name in neu_df.columns], fill_color='white', align='left'), | |
| columnwidth=[1, 4] | |
| ) | |
| fig.add_trace(table_trace2, row=18, col=1) | |
| ########################### 4.2. Emotion Analysis ########################### | |
| fig.add_trace(go.Indicator( | |
| mode = "number", | |
| value = int(emotion_mean*100), | |
| number = {"suffix": "%"}, | |
| title = {"text": "<span style='font-size:1.5em'>Emotion Analysis</span><br><span style='font-size:0.8em;color:gray'>Happiness Score</span>"} | |
| ), row=26, col=3) | |
| # Add bar chart | |
| colors_emotions = ['#174ecf', '#cfc517', '#940625', '#17cfcb'] | |
| emotion_bar_xlabels = ['Joy', 'Sadness', 'Anger', 'Surprise'] | |
| emotion_bar_ylabels = [num_of_joy_sentences, | |
| num_of_sad_sentences, | |
| num_of_anger_sentences, | |
| num_of_surprise_sentences] | |
| #annotations = [dict(x=x, y=y, text='😀', showarrow=False) for x, y in zip(emotion_bar_xlabels, emotion_bar_ylabels)] | |
| annotations = ['😀', '😞', '😡', '😯'] | |
| fig.add_trace( | |
| go.Bar(x=emotion_bar_xlabels, y= emotion_bar_ylabels, | |
| showlegend=True, | |
| marker_color=colors_emotions, | |
| text=annotations, | |
| textfont=dict(size=40)), | |
| row=29, col=1) | |
| fig.update_xaxes(title_text='Emotions', title_font=dict(size=16), row=29, col=1) | |
| fig.update_yaxes(title_text='Number of sentences', title_font=dict(size=16), row=29, col=1) | |
| # df_anger.loc[0] = [0.0, 'None'] | |
| # df_anger | |
| ################## happiness table | |
| table_trace2 = go.Table( | |
| header=dict(values=list(df_joy.columns), fill_color='lightgray', align='left'), | |
| cells=dict(values=[df_joy[name] for name in df_joy.columns], fill_color='white', align='left'), | |
| columnwidth=[1, 4] | |
| ) | |
| fig.add_trace(table_trace2, row=36, col=1) | |
| ################## sadness table | |
| table_trace2 = go.Table( | |
| header=dict(values=list(df_sadness.columns), fill_color='lightgray', align='left'), | |
| cells=dict(values=[df_sadness[name] for name in df_sadness.columns], fill_color='white', align='left'), | |
| columnwidth=[1, 4] | |
| ) | |
| fig.add_trace(table_trace2, row=36, col=4) | |
| ################## surprise table | |
| table_trace2 = go.Table( | |
| header=dict(values=list(df_surprise.columns), fill_color='lightgray', align='left'), | |
| cells=dict(values=[df_surprise[name] for name in df_surprise.columns], fill_color='white', align='left'), | |
| columnwidth=[1, 4] | |
| ) | |
| fig.add_trace(table_trace2, row=39, col=1) | |
| ################## anger table | |
| table_trace2 = go.Table( | |
| header=dict(values=list(df_anger.columns), fill_color='lightgray', align='left'), | |
| cells=dict(values=[df_anger[name] for name in df_anger.columns], fill_color='white', align='left'), | |
| columnwidth=[1, 4] | |
| ) | |
| fig.add_trace(table_trace2, row=39, col=4) | |
| ########################### 4.3. Intent Analysis ########################### | |
| fig.add_trace(go.Indicator( | |
| mode = "number", | |
| value = round(num_of_suggestions/max(num_of_complaints,0), 2), | |
| number = {"suffix": ""}, | |
| title = {"text": "<span style='font-size:1.5em'>Intent Analysis</span><br><span style='font-size:0.8em;color:gray'>Suggestion/Complaint Ratio</span>"} | |
| ), row=44, col=3) | |
| fig.add_trace(go.Indicator( | |
| mode = "number+gauge", | |
| gauge = {'shape': "bullet", 'axis': {'range': [None, total_num_of_intent]}, 'bar': {'color': "blue"}}, | |
| #delta = {'reference': 300}, | |
| value = num_of_queries, | |
| #domain = {'x': [0.5, 1], 'y': [0.3, 0.9]}, | |
| title = {'text': "Queries"}), row=47, col=2) | |
| fig.add_trace(go.Indicator( | |
| mode = "number+gauge", | |
| gauge = {'shape': "bullet", 'axis': {'range': [None, total_num_of_intent]},}, | |
| #delta = {'reference': 300}, | |
| value = num_of_suggestions, | |
| #domain = {'x': [0.5, 1], 'y': [0.3, 0.9]}, | |
| title = {'text': "Suggestions"}), row=50, col=2) | |
| fig.add_trace(go.Indicator( | |
| mode = "number+gauge", | |
| gauge = {'shape': "bullet", 'axis': {'range': [None, total_num_of_intent]}, 'bar': {'color': "red"}}, | |
| #delta = {'reference': 300}, | |
| value = num_of_complaints, | |
| #domain = {'x': [0.5, 1], 'y': [0.3, 0.9]}, | |
| title = {'text': "Complaints"}), row=53, col=2) | |
| ############ query table | |
| table_trace2 = go.Table( | |
| header=dict(values=list(df_query.columns), fill_color='lightgray', align='left'), | |
| cells=dict(values=[df_query[name] for name in df_query.columns], fill_color='white', align='left'), | |
| columnwidth=[1, 4] | |
| ) | |
| fig.add_trace(table_trace2, row=56, col=1) | |
| ############ complaints table | |
| table_trace2 = go.Table( | |
| header=dict(values=list(df_complaint.columns), fill_color='lightgray', align='left'), | |
| cells=dict(values=[df_complaint[name] for name in df_complaint.columns], fill_color='white', align='left'), | |
| columnwidth=[1, 4] | |
| ) | |
| fig.add_trace(table_trace2, row=56, col=3) | |
| ############ suggestions table | |
| table_trace2 = go.Table( | |
| header=dict(values=list(df_suggestion.columns), fill_color='lightgray', align='left'), | |
| cells=dict(values=[df_suggestion[name] for name in df_suggestion.columns], fill_color='white', align='left'), | |
| columnwidth=[1, 4] | |
| ) | |
| fig.add_trace(table_trace2, row=56, col=5) | |
| import textwrap | |
| if len(title) > 120: | |
| title = title[:120] + '...' | |
| wrapped_title = "\n".join(textwrap.wrap(title, width=50)) | |
| # Add HTML tags to force line breaks in the title text | |
| wrapped_title = "<br>".join(wrapped_title.split("\n")) | |
| fig.update_layout(height=4000, showlegend=False, title={'text': f"<b>{wrapped_title} - Text Analysis Report</b>", 'x': 0.5, 'xanchor': 'center','font': {'size': 32}}) | |
| #pyo.plot(fig, filename='report.html') | |
| ############################## 5. Download Report ############################## | |
| buffer = io.StringIO() | |
| fig.write_html(buffer, include_plotlyjs='cdn') | |
| html_bytes = buffer.getvalue().encode() | |
| st.download_button( | |
| label='Download Report', | |
| data=html_bytes, | |
| file_name='report.html', | |
| mime='text/html' | |
| ) | |
| st.session_state.filename_key = file_name |