Spaces:
Runtime error
Runtime error
Commit
·
54b89ad
1
Parent(s):
982eb46
added from experimental: continued manual rating, table data preview, pre rating visualisation
Browse files- pages/1_⚙️Manual assessment.py +48 -0
- pages/3_📊Assessment summary.py +72 -25
pages/1_⚙️Manual assessment.py
CHANGED
@@ -3,12 +3,52 @@ import numpy as np
|
|
3 |
import pandas as pd
|
4 |
from PIL import Image
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
st.title('Manual assessment')
|
7 |
st.write('On this page you can rate all uploaded images with regards to how good they match their respective prompts. You can see the outcome of your assessment on the summary page.')
|
8 |
st.write(' ')
|
9 |
side_image = Image.open('Graphics/IL_Logo.png')
|
10 |
st.sidebar.image(side_image)
|
11 |
# Create placeholders for key elements
|
|
|
12 |
assessment_progress = st.empty()
|
13 |
|
14 |
# Extract how many images are available for manual assessment in entire uploaded dataset
|
@@ -30,12 +70,14 @@ except KeyError:
|
|
30 |
## If subprompt option is selected, it expands the form to include these as well
|
31 |
## If no images are available it prints situation specific instructions
|
32 |
if manual_eval_available > 0:
|
|
|
33 |
# Let user choose whether subprompts should be presented
|
34 |
include_subprompts = st.checkbox('Show related subprompts if available (uploaded subprompts may not be shown if images have been assessed already).', value=True)
|
35 |
|
36 |
# Update the progress statement
|
37 |
assessment_progress.write('{0} images ready / left for assessment.'.format(manual_eval_available))
|
38 |
|
|
|
39 |
# Extract first example for manual assessment which is not rated yet (first meaning the lowest index, for lowest prompt number)
|
40 |
## Also extract relevant metadata of this example
|
41 |
curr_eval_df = st.session_state['eval_df']
|
@@ -129,6 +171,9 @@ if manual_eval_available > 0:
|
|
129 |
|
130 |
# Reset page after ratings were submitted
|
131 |
st.experimental_rerun()
|
|
|
|
|
|
|
132 |
# If no files are uploaded
|
133 |
elif len(st.session_state['uploaded_img'])==0:
|
134 |
assessment_progress.write('Upload files on dashboard starting page to start manual assessment.')
|
@@ -140,3 +185,6 @@ else:
|
|
140 |
|
141 |
#st.session_state['eval_df'].loc[curr_manual_eval,'manual_eval_completed']=True
|
142 |
#st.write(st.session_state['eval_df'])
|
|
|
|
|
|
|
|
3 |
import pandas as pd
|
4 |
from PIL import Image
|
5 |
|
6 |
+
def add_previous_manual_assessments():
|
7 |
+
'''
|
8 |
+
This is a routine to allow the user to upload prior manual ratings and override
|
9 |
+
current ratings. This way the user can restart a manual assessment.
|
10 |
+
'''
|
11 |
+
# Create dict to translate uploaded score into str format used during manual assessment
|
12 |
+
Bool_str_dict = {True:'Yes',False:'No'}
|
13 |
+
|
14 |
+
st.subheader('Add previous assessments')
|
15 |
+
st.write('Upload results of previous assessment (as downloaded from summary page) to add these results and skip these images in your current manual assessment. Note that you can only add results for images which you have uploaded using the same file name.')
|
16 |
+
|
17 |
+
uploaded_ratings = st.file_uploader('Select .csv for upload', accept_multiple_files=False)
|
18 |
+
if uploaded_ratings != None:
|
19 |
+
try:
|
20 |
+
uploaded_ratings_df = pd.read_csv(uploaded_ratings)
|
21 |
+
overlapping_files_df =pd.merge(st.session_state['eval_df'],uploaded_ratings_df,on='File_name',how='inner')
|
22 |
+
st.write('Number of matching file names found: '+ str(len(overlapping_files_df)))
|
23 |
+
st.write('Click "Add results" button to add / override current ratings with uploaded ratings.')
|
24 |
+
except UnicodeDecodeError:
|
25 |
+
st.write('WARNING: The uploaded file has to be a .csv downloaded from the "Assessment summary" page.')
|
26 |
+
|
27 |
+
|
28 |
+
submitted = st.button("Add results")
|
29 |
+
if submitted:
|
30 |
+
try:
|
31 |
+
for row in uploaded_ratings_df.itertuples():
|
32 |
+
st.session_state['eval_df'].loc[
|
33 |
+
st.session_state['eval_df']['File_name']==row.File_name,'manual_eval']=True
|
34 |
+
st.session_state['eval_df'].loc[
|
35 |
+
st.session_state['eval_df']['File_name']==row.File_name,'manual_eval_completed']=True
|
36 |
+
st.session_state['eval_df'].loc[
|
37 |
+
st.session_state['eval_df']['File_name']==row.File_name,'manual_eval_task_score']=Bool_str_dict[row.Score]
|
38 |
+
|
39 |
+
# Reset page after ratings were submitted
|
40 |
+
st.experimental_rerun()
|
41 |
+
except NameError:
|
42 |
+
st.write('You need to upload a .csv file before you can add results.')
|
43 |
+
|
44 |
+
|
45 |
st.title('Manual assessment')
|
46 |
st.write('On this page you can rate all uploaded images with regards to how good they match their respective prompts. You can see the outcome of your assessment on the summary page.')
|
47 |
st.write(' ')
|
48 |
side_image = Image.open('Graphics/IL_Logo.png')
|
49 |
st.sidebar.image(side_image)
|
50 |
# Create placeholders for key elements
|
51 |
+
assessment_header = st.empty()
|
52 |
assessment_progress = st.empty()
|
53 |
|
54 |
# Extract how many images are available for manual assessment in entire uploaded dataset
|
|
|
70 |
## If subprompt option is selected, it expands the form to include these as well
|
71 |
## If no images are available it prints situation specific instructions
|
72 |
if manual_eval_available > 0:
|
73 |
+
assessment_header.subheader('Assess uploaded images')
|
74 |
# Let user choose whether subprompts should be presented
|
75 |
include_subprompts = st.checkbox('Show related subprompts if available (uploaded subprompts may not be shown if images have been assessed already).', value=True)
|
76 |
|
77 |
# Update the progress statement
|
78 |
assessment_progress.write('{0} images ready / left for assessment.'.format(manual_eval_available))
|
79 |
|
80 |
+
|
81 |
# Extract first example for manual assessment which is not rated yet (first meaning the lowest index, for lowest prompt number)
|
82 |
## Also extract relevant metadata of this example
|
83 |
curr_eval_df = st.session_state['eval_df']
|
|
|
171 |
|
172 |
# Reset page after ratings were submitted
|
173 |
st.experimental_rerun()
|
174 |
+
|
175 |
+
add_previous_manual_assessments()
|
176 |
+
|
177 |
# If no files are uploaded
|
178 |
elif len(st.session_state['uploaded_img'])==0:
|
179 |
assessment_progress.write('Upload files on dashboard starting page to start manual assessment.')
|
|
|
185 |
|
186 |
#st.session_state['eval_df'].loc[curr_manual_eval,'manual_eval_completed']=True
|
187 |
#st.write(st.session_state['eval_df'])
|
188 |
+
|
189 |
+
|
190 |
+
|
pages/3_📊Assessment summary.py
CHANGED
@@ -11,11 +11,21 @@ def convert_df_to_csv(df):
|
|
11 |
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
12 |
return df[['File_name','Prompt_no','Task','Score']].to_csv().encode('utf-8')
|
13 |
|
14 |
-
def plot_style_simple(results_df):
|
|
|
|
|
15 |
eval_sum = results_df.groupby('Task')['Score'].sum()
|
16 |
eval_count = results_df.groupby('Task')['Score'].count()
|
17 |
eval_share = (eval_sum/eval_count)*100
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
fig = plt.figure(figsize=(12, 3))
|
20 |
sns.barplot(x=eval_share.index, y=eval_share.values, palette='GnBu')
|
21 |
plt.xticks(rotation=-45)
|
@@ -23,9 +33,9 @@ def plot_style_simple(results_df):
|
|
23 |
plt.xlabel(' ')
|
24 |
return fig
|
25 |
|
26 |
-
def plot_style_combined(results_df, uploaded_df = None):
|
27 |
# Create joined dataframe of results and uploadd_df
|
28 |
-
uploaded_results_df =
|
29 |
manual_results_df['Model']='Current'
|
30 |
uploaded_results_df['Model']='Uploaded'
|
31 |
results_df = pd.concat([manual_results_df,uploaded_results_df])
|
@@ -36,6 +46,14 @@ def plot_style_combined(results_df, uploaded_df = None):
|
|
36 |
eval_share = (eval_sum/eval_count)*100
|
37 |
eval_share = eval_share.reset_index()
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# Create plot
|
40 |
fig = plt.figure(figsize=(12, 3))
|
41 |
sns.barplot(data=eval_share,x='Task',y='Score',hue='Model', palette='GnBu')
|
@@ -44,20 +62,49 @@ def plot_style_combined(results_df, uploaded_df = None):
|
|
44 |
plt.xlabel(' ')
|
45 |
return fig
|
46 |
|
47 |
-
assessment_result_frames = {}
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
st.title('Assessment Summary')
|
52 |
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
try:
|
57 |
if sum(st.session_state['eval_df']['manual_eval_completed'])>0:
|
58 |
# Display file uploader
|
59 |
manual_file_upload = st.file_uploader("Upload .csv with saved manual assessment for model comparison")
|
60 |
-
|
61 |
# Create dataset for manual summary plots
|
62 |
manual_eval_df = st.session_state['eval_df']
|
63 |
manual_eval_df['Score'] = manual_eval_df['manual_eval_task_score'].map({'Yes':True, 'No':False})
|
@@ -67,13 +114,12 @@ try:
|
|
67 |
|
68 |
assessment_result_frames['Manual assessment'] = manual_results_df
|
69 |
|
70 |
-
#
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
st.pyplot(fig)
|
77 |
|
78 |
st.download_button(
|
79 |
label="Download manual assessment data",
|
@@ -82,9 +128,12 @@ try:
|
|
82 |
mime='text/csv',
|
83 |
)
|
84 |
else:
|
85 |
-
|
86 |
except KeyError:
|
87 |
-
|
|
|
|
|
|
|
88 |
|
89 |
|
90 |
st.write(' ')
|
@@ -97,14 +146,12 @@ try:
|
|
97 |
# Display file uploader
|
98 |
auto_file_upload = st.file_uploader("Upload .csv with saved automated assessment for model comparison")
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
fig = plot_style_combined(auto_eval_df,auto_file_upload)
|
107 |
-
st.pyplot(fig)
|
108 |
|
109 |
st.download_button(
|
110 |
label="Download automated assessment data",
|
@@ -113,7 +160,7 @@ try:
|
|
113 |
mime='text/csv',
|
114 |
)
|
115 |
except KeyError:
|
116 |
-
|
117 |
|
118 |
|
119 |
try:
|
|
|
11 |
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
12 |
return df[['File_name','Prompt_no','Task','Score']].to_csv().encode('utf-8')
|
13 |
|
14 |
+
def plot_style_simple(results_df, return_table = False):
|
15 |
+
|
16 |
+
|
17 |
eval_sum = results_df.groupby('Task')['Score'].sum()
|
18 |
eval_count = results_df.groupby('Task')['Score'].count()
|
19 |
eval_share = (eval_sum/eval_count)*100
|
20 |
|
21 |
+
if return_table:
|
22 |
+
return_series = results_df.groupby('Task')['Score'].sum()/results_df.groupby('Task')['Score'].count()*100
|
23 |
+
return_series = return_series.rename('Percentage correct')
|
24 |
+
return return_series
|
25 |
+
|
26 |
+
# Add small amount to make the bars on plot not disappear
|
27 |
+
eval_share = eval_share+1
|
28 |
+
|
29 |
fig = plt.figure(figsize=(12, 3))
|
30 |
sns.barplot(x=eval_share.index, y=eval_share.values, palette='GnBu')
|
31 |
plt.xticks(rotation=-45)
|
|
|
33 |
plt.xlabel(' ')
|
34 |
return fig
|
35 |
|
36 |
+
def plot_style_combined(results_df, uploaded_df = None, return_table=False):
|
37 |
# Create joined dataframe of results and uploadd_df
|
38 |
+
uploaded_results_df = uploaded_df
|
39 |
manual_results_df['Model']='Current'
|
40 |
uploaded_results_df['Model']='Uploaded'
|
41 |
results_df = pd.concat([manual_results_df,uploaded_results_df])
|
|
|
46 |
eval_share = (eval_sum/eval_count)*100
|
47 |
eval_share = eval_share.reset_index()
|
48 |
|
49 |
+
if return_table:
|
50 |
+
return_series = results_df.groupby(['Task','Model'])['Score'].sum()/results_df.groupby(['Task','Model'])['Score'].count()*100
|
51 |
+
return_series = return_series.rename('Percentage correct')
|
52 |
+
return return_series
|
53 |
+
|
54 |
+
# Add small amount to make the bars on plot not disappear
|
55 |
+
eval_share['Score'] = eval_share['Score']+1
|
56 |
+
|
57 |
# Create plot
|
58 |
fig = plt.figure(figsize=(12, 3))
|
59 |
sns.barplot(data=eval_share,x='Task',y='Score',hue='Model', palette='GnBu')
|
|
|
62 |
plt.xlabel(' ')
|
63 |
return fig
|
64 |
|
|
|
65 |
|
66 |
+
def print_results_tabs(file_upload, results_df, file_upload_df=None):
|
67 |
+
# Create a tab for bar chart and one for table data
|
68 |
+
tab1, tab2 = st.tabs(["Bar chart", "Data table"])
|
69 |
+
with tab1:
|
70 |
+
# If df was uploaded for comparison, we create comparison plot, else simple plot
|
71 |
+
if file_upload == None:
|
72 |
+
fig = plot_style_simple(results_df)
|
73 |
+
st.pyplot(fig)
|
74 |
+
else:
|
75 |
+
fig = plot_style_combined(results_df,file_upload_df)
|
76 |
+
st.pyplot(fig)
|
77 |
|
78 |
+
with tab2:
|
79 |
+
# If df was uploaded for comparison, we create comparison table, else simple table
|
80 |
+
if file_upload == None:
|
81 |
+
table = plot_style_simple(results_df, return_table=True)
|
82 |
+
st.write(table)
|
83 |
+
else:
|
84 |
+
table = plot_style_combined(results_df,file_upload_df, return_table=True)
|
85 |
+
st.write(table)
|
86 |
|
|
|
87 |
|
88 |
+
def pre_assessment_visualisation(type_str):
|
89 |
+
st.write('Complete {0} assessment or upload .csv with saved {0} assessment to generate summary.'.format(type_str))
|
90 |
+
|
91 |
+
# Display file uploader
|
92 |
+
file_upload = st.file_uploader("Upload .csv with saved {0} assessment to plot prior results.".format(type_str))
|
93 |
+
if file_upload != None:
|
94 |
+
file_upload_df = pd.read_csv(file_upload).copy()
|
95 |
+
print_results_tabs(file_upload=None, results_df=file_upload_df)
|
96 |
|
97 |
+
assessment_result_frames = {}
|
98 |
+
|
99 |
+
|
100 |
+
st.title('Assessment Summary')
|
101 |
+
st.header('Manual assessment')
|
102 |
|
103 |
try:
|
104 |
if sum(st.session_state['eval_df']['manual_eval_completed'])>0:
|
105 |
# Display file uploader
|
106 |
manual_file_upload = st.file_uploader("Upload .csv with saved manual assessment for model comparison")
|
107 |
+
|
108 |
# Create dataset for manual summary plots
|
109 |
manual_eval_df = st.session_state['eval_df']
|
110 |
manual_eval_df['Score'] = manual_eval_df['manual_eval_task_score'].map({'Yes':True, 'No':False})
|
|
|
114 |
|
115 |
assessment_result_frames['Manual assessment'] = manual_results_df
|
116 |
|
117 |
+
# Add plots / tables to page
|
118 |
+
try:
|
119 |
+
manual_file_upload_df = pd.read_csv(manual_file_upload).copy()
|
120 |
+
print_results_tabs(file_upload=manual_file_upload, results_df=manual_results_df, file_upload_df=manual_file_upload_df)
|
121 |
+
except ValueError:
|
122 |
+
print_results_tabs(file_upload=manual_file_upload, results_df=manual_results_df)
|
|
|
123 |
|
124 |
st.download_button(
|
125 |
label="Download manual assessment data",
|
|
|
128 |
mime='text/csv',
|
129 |
)
|
130 |
else:
|
131 |
+
pre_assessment_visualisation(type_str='manual')
|
132 |
except KeyError:
|
133 |
+
pre_assessment_visualisation(type_str='manual')
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
|
138 |
|
139 |
st.write(' ')
|
|
|
146 |
# Display file uploader
|
147 |
auto_file_upload = st.file_uploader("Upload .csv with saved automated assessment for model comparison")
|
148 |
|
149 |
+
# Add plots / tables to page
|
150 |
+
try:
|
151 |
+
auto_file_upload_df = pd.read_csv(auto_file_upload).copy()
|
152 |
+
print_results_tabs(file_upload=auto_file_upload, results_df=auto_eval_df, file_upload_df=auto_file_upload_df)
|
153 |
+
except ValueError:
|
154 |
+
print_results_tabs(file_upload=auto_file_upload, results_df=auto_eval_df)
|
|
|
|
|
155 |
|
156 |
st.download_button(
|
157 |
label="Download automated assessment data",
|
|
|
160 |
mime='text/csv',
|
161 |
)
|
162 |
except KeyError:
|
163 |
+
pre_assessment_visualisation(type_str='automated')
|
164 |
|
165 |
|
166 |
try:
|