Spaces:
Runtime error
Runtime error
import streamlit as st | |
import os.path | |
import pathlib | |
import pandas as pd | |
import numpy as np | |
import PyPDF2 | |
from PyPDF2 import PdfReader | |
from os import walk | |
import nltk | |
import glob | |
import plotly.express as px | |
from wordcloud import WordCloud | |
import plotly.io as pio | |
from plotly.subplots import make_subplots | |
import plotly.graph_objs as go | |
import pandas as pd | |
import plotly.offline as pyo | |
import io | |
from io import StringIO | |
#@st.cache_resource() | |
def get_nl(): | |
return nltk.download('punkt') | |
get_nl() | |
from nltk.tokenize import sent_tokenize | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from transformers import pipeline | |
# if os.path.exists("report.html"): | |
# os.remove("report.html") | |
#@st.cache_resource() | |
def get_sentiment_model(): | |
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert") | |
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert") | |
return tokenizer,model | |
tokenizer_sentiment,model_sentiment = get_sentiment_model() | |
def get_emotion_model(): | |
tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base") | |
model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base") | |
return tokenizer,model | |
tokenizer_emotion,model_emotion = get_emotion_model() | |
def get_intent_model(): | |
classifier = pipeline("zero-shot-classification", model='cross-encoder/nli-deberta-v3-small') | |
return classifier | |
intent_classifier = get_intent_model() | |
def extract_text_from_pdf(path): | |
text='' | |
reader = PdfReader(path) | |
number_of_pages = len(reader.pages) | |
print(number_of_pages) | |
for i in range(number_of_pages): | |
page=reader.pages[i] | |
text = text + page.extract_text() | |
return text | |
# Create a button to download the HTML file | |
def download_html(): | |
with st.spinner('Downloading HTML file...'): | |
# Get the HTML content | |
with open('report.html', "r") as f: | |
html = f.read() | |
f.close() | |
# Set the file name and content type | |
file_name = "report.html" | |
mime_type = "text/html" | |
# Use st.download_button() to create a download button | |
print('download button') | |
st.download_button(label="Download Report", data=html, file_name=file_name, mime=mime_type) | |
st.stop() | |
if 'filename_key' not in st.session_state: | |
st.session_state.filename_key = '' | |
st.write(""" | |
# Dcoument Analysis Tool | |
""") | |
#uploaded_file = st.file_uploader("Choose a PDF file") | |
#uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False, type=['pdf']) | |
uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=True, type=['pdf']) | |
#if uploaded_file is not None: | |
if len(uploaded_file)==0: | |
#print('none') | |
st.session_state.filename_key = '' | |
elif len(uploaded_file)>0: | |
import time | |
# Wait for 5 seconds | |
time.sleep(5) | |
pdf_reader = PyPDF2.PdfReader(uploaded_file[0]) | |
num_pages = len(pdf_reader.pages) | |
file_name = uploaded_file[0].name | |
# st.write(st.session_state.filename_key) | |
# print(file_name) | |
# st.write("Filename:", file_name) | |
if num_pages > 20: | |
st.error("Pages in PDF file should be less than 20.") | |
# Check that only one file was uploaded | |
#elif isinstance(uploaded_file, list): | |
elif len(uploaded_file) > 1: | |
st.error("Please upload only one PDF file at a time.") | |
elif st.session_state.filename_key == file_name: | |
st.write("Report downloaded successfully") | |
else: | |
#uploaded_file = uploaded_file[0] | |
# Check that the file is a PDF | |
if uploaded_file[0].type != 'application/pdf': | |
st.error("Please upload a PDF file.") | |
else: | |
############################ 1. Extract text from PDF ############################ | |
text='' | |
# return text from pdf | |
pdf_reader = PyPDF2.PdfReader(uploaded_file[0]) | |
# Get the number of pages in the PDF file | |
num_pages = len(pdf_reader.pages) | |
# Display the number of pages in the PDF file | |
st.write(f"Number of pages in PDF file: {num_pages}") | |
for i in range(num_pages): | |
page=pdf_reader.pages[i] | |
text = text + page.extract_text() | |
############################ 2. Running models ############################ | |
text = text.replace("\n", " " ) | |
text = text.replace("$", "dollar " ) | |
sentences = sent_tokenize(text) | |
title = sentences[0] | |
long_sentence=[] | |
small_sentence=[] | |
useful_sentence=[] | |
for i in sentences: | |
if len(i) > 510: | |
long_sentence.append(i) | |
elif len(i) < 50: | |
small_sentence.append(i) | |
else: | |
useful_sentence.append(i) | |
useful_sentence_len = len(useful_sentence) | |
del sentences | |
############################ 2.1 Sentiment Modeling ############################ | |
placeholder1 = st.empty() | |
placeholder1.text('Performing Sentiment Analysis...') | |
#with st.empty(): | |
my_bar = st.progress(0) | |
tokenizer = tokenizer_sentiment | |
model = model_sentiment | |
pipe = pipeline(model="ProsusAI/finbert") | |
classifier = pipeline(model="ProsusAI/finbert") | |
#output = classifier(useful_sentence) | |
output=[] | |
i=0 | |
for temp in useful_sentence: | |
output.extend(classifier(temp)) | |
i=i+1 | |
my_bar.progress(int((i/useful_sentence_len)*100)) | |
my_bar.empty() | |
df = pd.DataFrame.from_dict(output) | |
df['Sentence']= pd.Series(useful_sentence) | |
############################ 2.2 Emotion Modeling ############################ | |
#placeholder2 = st.empty() | |
placeholder1.text('Performing Emotion Analysis...') | |
# with st.empty(): | |
my_bar = st.progress(0) | |
tokenizer = tokenizer_emotion | |
model = model_emotion | |
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1) | |
output_emotion = [] | |
i=0 | |
for temp in useful_sentence: | |
output_emotion.extend(classifier(temp)[0]) | |
i=i+1 | |
my_bar.progress(int((i/useful_sentence_len)*100)) | |
my_bar.empty() | |
placeholder1.text('Emotion Analysis Completed') | |
############################ 2.3 Intent Modeling ############################ | |
placeholder1.text('Performing Intent Analysis...') | |
my_bar = st.progress(0) | |
candidate_labels = ['complaint', 'suggestion', 'query'] | |
classifier = intent_classifier | |
# temp_intent = classifier(useful_sentence, candidate_labels) | |
# output_intent=[] | |
# for temp in temp_intent: | |
# output_intent.append({'label' : temp['labels'][0], 'score' : temp['scores'][0]}) | |
output_intent=[] | |
i=0 | |
for temp1 in useful_sentence: | |
temp = classifier(temp1, candidate_labels) | |
output_intent.append({'label' : temp['labels'][0], 'score' : temp['scores'][0]}) | |
i=i+1 | |
my_bar.progress(int((i/useful_sentence_len)*100)) | |
df_intent = pd.DataFrame.from_dict(output_intent) | |
df_intent['Sentence']= pd.Series(useful_sentence) | |
my_bar.empty() | |
placeholder1.text('Processing Completed') | |
############################ 3. Processing ############################ | |
############################ 3.1. Sentiment Analysis ############################ | |
# labels = ['neutral', 'positive', 'negative'] | |
# values = df.label.value_counts().to_list() | |
labels = ['neutral', 'positive', 'negative'] | |
values = [df[df['label']=='neutral'].shape[0], df[df['label']=='positive'].shape[0], df[df['label']=='negative'].shape[0]] | |
# removing words | |
words_to_remove = ["s", "quarter", "thank", "million", "Thank", "quetion", 'wa', 'rate', 'firt', | |
"customer", "business", "last year", "year", 'lat', 'well', 'jut', 'thi', 'cutomer', | |
"will", "think", "higher", "question", "going"] | |
for word in words_to_remove: | |
text = text.replace(word, "") | |
wordcloud = WordCloud(background_color='white', width=800, height=400).generate(text) | |
image = wordcloud.to_image() | |
pos_df = df[df['label']=='positive'] | |
pos_df = pos_df[['score', 'Sentence']] | |
pos_df = pos_df.sort_values('score', ascending=False) | |
pos_df_mean = pos_df.score.mean() | |
pos_df['score'] = pos_df['score'].round(4) | |
pos_df.rename(columns = {'Sentence':'Positive Sentences'}, inplace = True) | |
num_of_pos_sentences = pos_df.shape[0] | |
if num_of_pos_sentences == 0: | |
pos_df.loc[0] = [0.0, '-------No positive sentences found in report-------'] | |
neg_df = df[df['label']=='negative'] | |
neg_df = neg_df[['score', 'Sentence']] | |
neg_df = neg_df.sort_values('score', ascending=False) | |
neg_df_mean = neg_df.score.mean() | |
neg_df['score'] = neg_df['score'].round(4) | |
neg_df.rename(columns = {'Sentence':'Negative Sentences'}, inplace = True) | |
num_of_neg_sentences = neg_df.shape[0] | |
if num_of_neg_sentences == 0: | |
neg_df.loc[0] = [0.0, '-------No negative sentences found in report-------'] | |
neu_df = df[df['label']=='neutral'] | |
neu_df = neu_df[['score', 'Sentence']] | |
neu_df = neu_df.sort_values('score', ascending=False) | |
#neu_df_mean = neu_df.score.mean() | |
neu_df['score'] = neu_df['score'].round(4) | |
neu_df.rename(columns = {'Sentence':'Neutral Sentences'}, inplace = True) | |
num_of_neu_sentences = neu_df.shape[0] | |
if num_of_neu_sentences == 0: | |
neu_df.loc[0] = [0.0, '-------No neutral sentences found in report-------'] | |
# df_temp = neg_df | |
# df_temp = df_temp['score'] * -1 | |
# df_temp = pd.concat([df_temp, pos_df]) | |
df_temp = neg_df | |
df_temp['score'] = df_temp['score'] * -1 | |
df_temp_list = df_temp['score'].to_list() + pos_df['score'].to_list() | |
mean = sum(df_temp_list) / len(df_temp_list) | |
############################ 3.2. Emotion Analysis ############################ | |
df_emotion = pd.DataFrame.from_dict(output_emotion) | |
df_emotion['Sentence']= pd.Series(useful_sentence) | |
df_joy = df_emotion[df_emotion['label']=='joy'] | |
df_joy = df_joy[['score', 'Sentence']] | |
df_joy = df_joy.sort_values('score', ascending=False) | |
df_joy['score'] = df_joy['score'].round(4) | |
df_joy.rename(columns = {'Sentence':'Joy Sentences'}, inplace = True) | |
num_of_joy_sentences = df_joy.shape[0] | |
if num_of_joy_sentences == 0: | |
df_joy.loc[0] = [0.0, '-------No joy sentences found in report-------'] | |
df_sadness = df_emotion[df_emotion['label']=='sadness'] | |
df_sadness = df_sadness[['score', 'Sentence']] | |
df_sadness = df_sadness.sort_values('score', ascending=False) | |
df_sadness['score'] = df_sadness['score'].round(4) | |
df_sadness.rename(columns = {'Sentence':'Sad Sentences'}, inplace = True) | |
num_of_sad_sentences = df_sadness.shape[0] | |
if num_of_sad_sentences == 0: | |
df_sadness.loc[0] = [0.0, '-------No sad sentences found in report-------'] | |
df_anger = df_emotion[df_emotion['label']=='anger'] | |
df_anger = df_anger[['score', 'Sentence']] | |
df_anger = df_anger.sort_values('score', ascending=False) | |
df_anger['score'] = df_anger['score'].round(4) | |
df_anger.rename(columns = {'Sentence':'Angry Sentences'}, inplace = True) | |
num_of_anger_sentences = df_anger.shape[0] | |
if num_of_anger_sentences == 0: | |
df_anger.loc[0] = [0.0, '-------No angry sentences found in report-------'] | |
df_surprise = df_emotion[df_emotion['label']=='surprise'] | |
df_surprise = df_surprise[['score', 'Sentence']] | |
df_surprise = df_surprise.sort_values('score', ascending=False) | |
df_surprise['score'] = df_surprise['score'].round(4) | |
df_surprise.rename(columns = {'Sentence':'Surprised Sentences'}, inplace = True) | |
num_of_surprise_sentences = df_surprise.shape[0] | |
if num_of_surprise_sentences == 0: | |
df_surprise.loc[0] = [0.0, '-------No surprised sentences found in report-------'] | |
# df_temp_emotion = df_sadness | |
# df_temp_emotion = pd.concat([df_sadness, df_anger]) | |
# df_temp_emotion = df_temp_emotion['score'] * -1 | |
# df_temp_emotion = pd.concat([df_temp_emotion, df_joy]) | |
df_temp_emotion = df_sadness | |
df_temp_emotion['score'] = df_temp_emotion['score'] * -1 | |
df_temp_emotion_list = df_temp_emotion['score'].to_list() + df_joy['score'].to_list() | |
emotion_mean = sum(df_temp_emotion_list) / len(df_temp_emotion_list) | |
# df_temp = neg_df | |
# df_temp['score'] = df_temp['score'] * -1 | |
# df_temp_list = df_temp['score'].to_list() + pos_df['score'].to_list() | |
# mean = sum(df_temp_list) / len(df_temp_list) | |
############################ 3.3. Intent Analysis ############################ | |
df_query = df_intent[df_intent['label']=='query'] | |
df_query = df_query[['score', 'Sentence']] | |
df_query = df_query.sort_values('score', ascending=False) | |
df_query['score'] = df_query['score'].round(4) | |
df_query.rename(columns = {'Sentence':'Queries'}, inplace = True) | |
df_query = df_query[df_query['score']>0.5] | |
num_of_queries = df_query.shape[0] | |
if num_of_queries == 0: | |
df_query.loc[0] = [0.0, '-------No queries found in report-------'] | |
df_complaint = df_intent[df_intent['label']=='complaint'] | |
df_complaint = df_complaint[['score', 'Sentence']] | |
df_complaint = df_complaint.sort_values('score', ascending=False) | |
df_complaint['score'] = df_complaint['score'].round(4) | |
df_complaint.rename(columns = {'Sentence':'Complaints'}, inplace = True) | |
df_complaint = df_complaint[df_complaint['score']>0.5] | |
num_of_complaints = df_complaint.shape[0] | |
if num_of_complaints == 0: | |
df_complaint.loc[0] = [0.0, '-------No complaints found in report-------'] | |
df_suggestion = df_intent[df_intent['label']=='suggestion'] | |
df_suggestion = df_suggestion[['score', 'Sentence']] | |
df_suggestion = df_suggestion.sort_values('score', ascending=False) | |
df_suggestion['score'] = df_suggestion['score'].round(4) | |
df_suggestion.rename(columns = {'Sentence':'Suggestions'}, inplace = True) | |
df_suggestion = df_suggestion[df_suggestion['score']>0.5] | |
num_of_suggestions = df_suggestion.shape[0] | |
if num_of_suggestions == 0: | |
df_suggestion.loc[0] = [0.0, '-------No suggestions found in report-------'] | |
total_num_of_intent = num_of_queries + num_of_complaints + num_of_suggestions | |
############################ 4. Plotting ############################ | |
fig = make_subplots( | |
rows=62, cols=6, | |
specs=[ [None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None], | |
[None, None, None, None, None, None], | |
[{"type": "pie", "rowspan": 6, "colspan": 2}, None, {"type": "indicator", "rowspan": 6, "colspan": 2}, None, {"type": "indicator", "rowspan": 6, "colspan": 2}, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[{"type": "image", "rowspan": 5, "colspan": 3}, None, None, {"type": "table", "rowspan": 5, "colspan": 3}, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[{"type": "table", "rowspan": 5, "colspan": 3}, None, None, {"type": "table", "rowspan": 5, "colspan": 3}, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[{"type": "bar", "rowspan": 6, "colspan": 6}, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[{"type": "table", "rowspan": 2, "colspan": 3}, None, None, {"type": "table", "rowspan": 2, "colspan": 3}, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[{"type": "table", "rowspan": 2, "colspan": 3}, None, None, {"type": "table", "rowspan": 2, "colspan": 3}, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, {"type": "indicator", "rowspan": 2, "colspan": 5}, None, None, None, None],#first bullet | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, {"type": "indicator", "rowspan": 2, "colspan": 5}, None, None, None, None], #2nd bullet | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, {"type": "indicator", "rowspan": 2, "colspan": 5}, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[{"type": "table", "rowspan": 4, "colspan": 2}, None, {"type": "table", "rowspan": 4, "colspan": 2}, None, {"type": "table", "rowspan": 4, "colspan": 2}, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
[None, None, None, None, None, None], | |
], | |
) | |
############################ 4.1. Sentiment Analysis ############################ | |
fig.add_trace(go.Indicator( | |
mode = "number", | |
value = int(mean*100), | |
number = {"suffix": "%"}, | |
title = {"text": "<span style='font-size:1.5em'>Sentiment Analysis</span><br><span style='font-size:0.8em;color:gray'>Positivity Score</span>"} | |
), row=4, col=3) | |
colors = px.colors.diverging.Portland#RdBu | |
fig.add_trace(go.Pie(labels=labels, values=values, hole = 0.5, | |
title = 'Count by label', | |
marker=dict(colors=colors, | |
line=dict(width=2, color='white'))), | |
row=6, col=1) | |
fig.add_trace(go.Indicator( | |
mode = "number", | |
value = len(df.label.values.tolist()), | |
title = {"text": "Count of Sentence"}), row=6, col=3) | |
#fig.update_traces(title_text="Sentiment Analysis", selector=dict(type='indicator'), row=6, col=3) | |
fig.add_trace(go.Indicator( | |
mode = "gauge+number", | |
value = mean, | |
domain = {'x': [0, 1], 'y': [0, 1]}, | |
title = {'text': "Average of Score", 'font': {'size': 16}}, | |
gauge = { | |
'axis': {'range': [-1, 1], 'tickwidth': 1, 'tickcolor': "darkblue"}, | |
'bar': {'color': "darkblue"}, | |
'steps': [ | |
{'range': [-0.29, 0.29], 'color': 'white'}, | |
{'range': [0.3, 1], 'color': 'green'}, | |
{'range': [-1, -0.3], 'color': 'red'} | |
], | |
'threshold': { | |
'line': {'color': "black", 'width': 4}, | |
'thickness': 0.75, | |
'value': abs((pos_df_mean - neg_df_mean)) | |
} | |
} | |
), row=6, col=5) | |
if mean < -0.29: | |
fig.update_traces(title_text="Cummulative Sentiment Negative", selector=dict(type='indicator'), row=6, col=5) | |
elif mean < 0.29: | |
fig.update_traces(title_text="Cummulative Sentiment Neutral", selector=dict(type='indicator'), row=6, col=5) | |
else: | |
fig.update_traces(title_text="Cummulative Sentiment Positive", selector=dict(type='indicator'), row=6, col=5) | |
fig.add_trace(go.Image(z=image), row=13, col=1) | |
fig.update_xaxes(visible=False, row=13, col=1) | |
fig.update_yaxes(visible=False, row=13, col=1) | |
table_trace1 = go.Table( | |
header=dict(values=list(pos_df.columns), fill_color='lightgray', align='left'), | |
cells=dict(values=[pos_df[name] for name in pos_df.columns], fill_color='white', align='left'), | |
columnwidth=[1, 4] | |
) | |
fig.add_trace(table_trace1, row=13, col=4) | |
table_trace2 = go.Table( | |
header=dict(values=list(neg_df.columns), fill_color='lightgray', align='left'), | |
cells=dict(values=[neg_df[name] for name in neg_df.columns], fill_color='white', align='left'), | |
columnwidth=[1, 4] | |
) | |
fig.add_trace(table_trace2, row=18, col=4) | |
table_trace2 = go.Table( | |
header=dict(values=list(neu_df.columns), fill_color='lightgray', align='left'), | |
cells=dict(values=[neu_df[name] for name in neu_df.columns], fill_color='white', align='left'), | |
columnwidth=[1, 4] | |
) | |
fig.add_trace(table_trace2, row=18, col=1) | |
########################### 4.2. Emotion Analysis ########################### | |
fig.add_trace(go.Indicator( | |
mode = "number", | |
value = int(emotion_mean*100), | |
number = {"suffix": "%"}, | |
title = {"text": "<span style='font-size:1.5em'>Emotion Analysis</span><br><span style='font-size:0.8em;color:gray'>Happiness Score</span>"} | |
), row=26, col=3) | |
# Add bar chart | |
colors_emotions = ['#174ecf', '#cfc517', '#940625', '#17cfcb'] | |
emotion_bar_xlabels = ['Joy', 'Sadness', 'Anger', 'Surprise'] | |
emotion_bar_ylabels = [num_of_joy_sentences, | |
num_of_sad_sentences, | |
num_of_anger_sentences, | |
num_of_surprise_sentences] | |
#annotations = [dict(x=x, y=y, text='π', showarrow=False) for x, y in zip(emotion_bar_xlabels, emotion_bar_ylabels)] | |
annotations = ['π', 'π', 'π‘', 'π―'] | |
fig.add_trace( | |
go.Bar(x=emotion_bar_xlabels, y= emotion_bar_ylabels, | |
showlegend=True, | |
marker_color=colors_emotions, | |
text=annotations, | |
textfont=dict(size=40)), | |
row=29, col=1) | |
fig.update_xaxes(title_text='Emotions', title_font=dict(size=16), row=29, col=1) | |
fig.update_yaxes(title_text='Number of sentences', title_font=dict(size=16), row=29, col=1) | |
# df_anger.loc[0] = [0.0, 'None'] | |
# df_anger | |
################## happiness table | |
table_trace2 = go.Table( | |
header=dict(values=list(df_joy.columns), fill_color='lightgray', align='left'), | |
cells=dict(values=[df_joy[name] for name in df_joy.columns], fill_color='white', align='left'), | |
columnwidth=[1, 4] | |
) | |
fig.add_trace(table_trace2, row=36, col=1) | |
################## sadness table | |
table_trace2 = go.Table( | |
header=dict(values=list(df_sadness.columns), fill_color='lightgray', align='left'), | |
cells=dict(values=[df_sadness[name] for name in df_sadness.columns], fill_color='white', align='left'), | |
columnwidth=[1, 4] | |
) | |
fig.add_trace(table_trace2, row=36, col=4) | |
################## surprise table | |
table_trace2 = go.Table( | |
header=dict(values=list(df_surprise.columns), fill_color='lightgray', align='left'), | |
cells=dict(values=[df_surprise[name] for name in df_surprise.columns], fill_color='white', align='left'), | |
columnwidth=[1, 4] | |
) | |
fig.add_trace(table_trace2, row=39, col=1) | |
################## anger table | |
table_trace2 = go.Table( | |
header=dict(values=list(df_anger.columns), fill_color='lightgray', align='left'), | |
cells=dict(values=[df_anger[name] for name in df_anger.columns], fill_color='white', align='left'), | |
columnwidth=[1, 4] | |
) | |
fig.add_trace(table_trace2, row=39, col=4) | |
########################### 4.3. Intent Analysis ########################### | |
fig.add_trace(go.Indicator( | |
mode = "number", | |
value = round(num_of_suggestions/max(num_of_complaints,0), 2), | |
number = {"suffix": ""}, | |
title = {"text": "<span style='font-size:1.5em'>Intent Analysis</span><br><span style='font-size:0.8em;color:gray'>Suggestion/Complaint Ratio</span>"} | |
), row=44, col=3) | |
fig.add_trace(go.Indicator( | |
mode = "number+gauge", | |
gauge = {'shape': "bullet", 'axis': {'range': [None, total_num_of_intent]}, 'bar': {'color': "blue"}}, | |
#delta = {'reference': 300}, | |
value = num_of_queries, | |
#domain = {'x': [0.5, 1], 'y': [0.3, 0.9]}, | |
title = {'text': "Queries"}), row=47, col=2) | |
fig.add_trace(go.Indicator( | |
mode = "number+gauge", | |
gauge = {'shape': "bullet", 'axis': {'range': [None, total_num_of_intent]},}, | |
#delta = {'reference': 300}, | |
value = num_of_suggestions, | |
#domain = {'x': [0.5, 1], 'y': [0.3, 0.9]}, | |
title = {'text': "Suggestions"}), row=50, col=2) | |
fig.add_trace(go.Indicator( | |
mode = "number+gauge", | |
gauge = {'shape': "bullet", 'axis': {'range': [None, total_num_of_intent]}, 'bar': {'color': "red"}}, | |
#delta = {'reference': 300}, | |
value = num_of_complaints, | |
#domain = {'x': [0.5, 1], 'y': [0.3, 0.9]}, | |
title = {'text': "Complaints"}), row=53, col=2) | |
############ query table | |
table_trace2 = go.Table( | |
header=dict(values=list(df_query.columns), fill_color='lightgray', align='left'), | |
cells=dict(values=[df_query[name] for name in df_query.columns], fill_color='white', align='left'), | |
columnwidth=[1, 4] | |
) | |
fig.add_trace(table_trace2, row=56, col=1) | |
############ complaints table | |
table_trace2 = go.Table( | |
header=dict(values=list(df_complaint.columns), fill_color='lightgray', align='left'), | |
cells=dict(values=[df_complaint[name] for name in df_complaint.columns], fill_color='white', align='left'), | |
columnwidth=[1, 4] | |
) | |
fig.add_trace(table_trace2, row=56, col=3) | |
############ suggestions table | |
table_trace2 = go.Table( | |
header=dict(values=list(df_suggestion.columns), fill_color='lightgray', align='left'), | |
cells=dict(values=[df_suggestion[name] for name in df_suggestion.columns], fill_color='white', align='left'), | |
columnwidth=[1, 4] | |
) | |
fig.add_trace(table_trace2, row=56, col=5) | |
import textwrap | |
if len(title) > 120: | |
title = title[:120] + '...' | |
wrapped_title = "\n".join(textwrap.wrap(title, width=50)) | |
# Add HTML tags to force line breaks in the title text | |
wrapped_title = "<br>".join(wrapped_title.split("\n")) | |
fig.update_layout(height=4000, showlegend=False, title={'text': f"<b>{wrapped_title} - Text Analysis Report</b>", 'x': 0.5, 'xanchor': 'center','font': {'size': 32}}) | |
#pyo.plot(fig, filename='report.html') | |
############################## 5. Download Report ############################## | |
buffer = io.StringIO() | |
fig.write_html(buffer, include_plotlyjs='cdn') | |
html_bytes = buffer.getvalue().encode() | |
st.download_button( | |
label='Download Report', | |
data=html_bytes, | |
file_name='report.html', | |
mime='text/html' | |
) | |
st.session_state.filename_key = file_name |