achterbrain commited on
Commit
54b89ad
·
1 Parent(s): 982eb46

added from experimental: continued manual rating, table data preview, pre rating visualisation

Browse files
pages/1_⚙️Manual assessment.py CHANGED
@@ -3,12 +3,52 @@ import numpy as np
3
  import pandas as pd
4
  from PIL import Image
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  st.title('Manual assessment')
7
  st.write('On this page you can rate all uploaded images with regards to how good they match their respective prompts. You can see the outcome of your assessment on the summary page.')
8
  st.write(' ')
9
  side_image = Image.open('Graphics/IL_Logo.png')
10
  st.sidebar.image(side_image)
11
  # Create placeholders for key elements
 
12
  assessment_progress = st.empty()
13
 
14
  # Extract how many images are available for manual assessment in entire uploaded dataset
@@ -30,12 +70,14 @@ except KeyError:
30
  ## If subprompt option is selected, it expands the form to include these as well
31
  ## If no images are available it prints situation specific instructions
32
  if manual_eval_available > 0:
 
33
  # Let user choose whether subprompts should be presented
34
  include_subprompts = st.checkbox('Show related subprompts if available (uploaded subprompts may not be shown if images have been assessed already).', value=True)
35
 
36
  # Update the progress statement
37
  assessment_progress.write('{0} images ready / left for assessment.'.format(manual_eval_available))
38
 
 
39
  # Extract first example for manual assessment which is not rated yet (first meaning the lowest index, for lowest prompt number)
40
  ## Also extract relevant metadata of this example
41
  curr_eval_df = st.session_state['eval_df']
@@ -129,6 +171,9 @@ if manual_eval_available > 0:
129
 
130
  # Reset page after ratings were submitted
131
  st.experimental_rerun()
 
 
 
132
  # If no files are uploaded
133
  elif len(st.session_state['uploaded_img'])==0:
134
  assessment_progress.write('Upload files on dashboard starting page to start manual assessment.')
@@ -140,3 +185,6 @@ else:
140
 
141
  #st.session_state['eval_df'].loc[curr_manual_eval,'manual_eval_completed']=True
142
  #st.write(st.session_state['eval_df'])
 
 
 
 
3
  import pandas as pd
4
  from PIL import Image
5
 
6
+ def add_previous_manual_assessments():
7
+ '''
8
+ This is a routine to allow the user to upload prior manual ratings and override
9
+ current ratings. This way the user can restart a manual assessment.
10
+ '''
11
+ # Create dict to translate uploaded score into str format used during manual assessment
12
+ Bool_str_dict = {True:'Yes',False:'No'}
13
+
14
+ st.subheader('Add previous assessments')
15
+ st.write('Upload results of previous assessment (as downloaded from summary page) to add these results and skip these images in your current manual assessment. Note that you can only add results for images which you have uploaded using the same file name.')
16
+
17
+ uploaded_ratings = st.file_uploader('Select .csv for upload', accept_multiple_files=False)
18
+ if uploaded_ratings != None:
19
+ try:
20
+ uploaded_ratings_df = pd.read_csv(uploaded_ratings)
21
+ overlapping_files_df =pd.merge(st.session_state['eval_df'],uploaded_ratings_df,on='File_name',how='inner')
22
+ st.write('Number of matching file names found: '+ str(len(overlapping_files_df)))
23
+ st.write('Click "Add results" button to add / override current ratings with uploaded ratings.')
24
+ except UnicodeDecodeError:
25
+ st.write('WARNING: The uploaded file has to be a .csv downloaded from the "Assessment summary" page.')
26
+
27
+
28
+ submitted = st.button("Add results")
29
+ if submitted:
30
+ try:
31
+ for row in uploaded_ratings_df.itertuples():
32
+ st.session_state['eval_df'].loc[
33
+ st.session_state['eval_df']['File_name']==row.File_name,'manual_eval']=True
34
+ st.session_state['eval_df'].loc[
35
+ st.session_state['eval_df']['File_name']==row.File_name,'manual_eval_completed']=True
36
+ st.session_state['eval_df'].loc[
37
+ st.session_state['eval_df']['File_name']==row.File_name,'manual_eval_task_score']=Bool_str_dict[row.Score]
38
+
39
+ # Reset page after ratings were submitted
40
+ st.experimental_rerun()
41
+ except NameError:
42
+ st.write('You need to upload a .csv file before you can add results.')
43
+
44
+
45
  st.title('Manual assessment')
46
  st.write('On this page you can rate all uploaded images with regards to how good they match their respective prompts. You can see the outcome of your assessment on the summary page.')
47
  st.write(' ')
48
  side_image = Image.open('Graphics/IL_Logo.png')
49
  st.sidebar.image(side_image)
50
  # Create placeholders for key elements
51
+ assessment_header = st.empty()
52
  assessment_progress = st.empty()
53
 
54
  # Extract how many images are available for manual assessment in entire uploaded dataset
 
70
  ## If subprompt option is selected, it expands the form to include these as well
71
  ## If no images are available it prints situation specific instructions
72
  if manual_eval_available > 0:
73
+ assessment_header.subheader('Assess uploaded images')
74
  # Let user choose whether subprompts should be presented
75
  include_subprompts = st.checkbox('Show related subprompts if available (uploaded subprompts may not be shown if images have been assessed already).', value=True)
76
 
77
  # Update the progress statement
78
  assessment_progress.write('{0} images ready / left for assessment.'.format(manual_eval_available))
79
 
80
+
81
  # Extract first example for manual assessment which is not rated yet (first meaning the lowest index, for lowest prompt number)
82
  ## Also extract relevant metadata of this example
83
  curr_eval_df = st.session_state['eval_df']
 
171
 
172
  # Reset page after ratings were submitted
173
  st.experimental_rerun()
174
+
175
+ add_previous_manual_assessments()
176
+
177
  # If no files are uploaded
178
  elif len(st.session_state['uploaded_img'])==0:
179
  assessment_progress.write('Upload files on dashboard starting page to start manual assessment.')
 
185
 
186
  #st.session_state['eval_df'].loc[curr_manual_eval,'manual_eval_completed']=True
187
  #st.write(st.session_state['eval_df'])
188
+
189
+
190
+
pages/3_📊Assessment summary.py CHANGED
@@ -11,11 +11,21 @@ def convert_df_to_csv(df):
11
  # IMPORTANT: Cache the conversion to prevent computation on every rerun
12
  return df[['File_name','Prompt_no','Task','Score']].to_csv().encode('utf-8')
13
 
14
- def plot_style_simple(results_df):
 
 
15
  eval_sum = results_df.groupby('Task')['Score'].sum()
16
  eval_count = results_df.groupby('Task')['Score'].count()
17
  eval_share = (eval_sum/eval_count)*100
18
 
 
 
 
 
 
 
 
 
19
  fig = plt.figure(figsize=(12, 3))
20
  sns.barplot(x=eval_share.index, y=eval_share.values, palette='GnBu')
21
  plt.xticks(rotation=-45)
@@ -23,9 +33,9 @@ def plot_style_simple(results_df):
23
  plt.xlabel(' ')
24
  return fig
25
 
26
- def plot_style_combined(results_df, uploaded_df = None):
27
  # Create joined dataframe of results and uploadd_df
28
- uploaded_results_df = pd.read_csv(uploaded_df)
29
  manual_results_df['Model']='Current'
30
  uploaded_results_df['Model']='Uploaded'
31
  results_df = pd.concat([manual_results_df,uploaded_results_df])
@@ -36,6 +46,14 @@ def plot_style_combined(results_df, uploaded_df = None):
36
  eval_share = (eval_sum/eval_count)*100
37
  eval_share = eval_share.reset_index()
38
 
 
 
 
 
 
 
 
 
39
  # Create plot
40
  fig = plt.figure(figsize=(12, 3))
41
  sns.barplot(data=eval_share,x='Task',y='Score',hue='Model', palette='GnBu')
@@ -44,20 +62,49 @@ def plot_style_combined(results_df, uploaded_df = None):
44
  plt.xlabel(' ')
45
  return fig
46
 
47
- assessment_result_frames = {}
48
 
 
 
 
 
 
 
 
 
 
 
 
49
 
 
 
 
 
 
 
 
 
50
 
51
- st.title('Assessment Summary')
52
 
53
- st.header('Manual assessment')
 
 
 
 
 
 
 
54
 
 
 
 
 
 
55
 
56
  try:
57
  if sum(st.session_state['eval_df']['manual_eval_completed'])>0:
58
  # Display file uploader
59
  manual_file_upload = st.file_uploader("Upload .csv with saved manual assessment for model comparison")
60
-
61
  # Create dataset for manual summary plots
62
  manual_eval_df = st.session_state['eval_df']
63
  manual_eval_df['Score'] = manual_eval_df['manual_eval_task_score'].map({'Yes':True, 'No':False})
@@ -67,13 +114,12 @@ try:
67
 
68
  assessment_result_frames['Manual assessment'] = manual_results_df
69
 
70
- # If df was uploaded for comparison, we create comparison plot, else simple plot
71
- if manual_file_upload == None:
72
- fig = plot_style_simple(manual_results_df)
73
- st.pyplot(fig)
74
- else:
75
- fig = plot_style_combined(manual_results_df,manual_file_upload)
76
- st.pyplot(fig)
77
 
78
  st.download_button(
79
  label="Download manual assessment data",
@@ -82,9 +128,12 @@ try:
82
  mime='text/csv',
83
  )
84
  else:
85
- st.write('Complete manual assessment to generate summary.')
86
  except KeyError:
87
- st.write('Complete automated assessment to generate summary.')
 
 
 
88
 
89
 
90
  st.write(' ')
@@ -97,14 +146,12 @@ try:
97
  # Display file uploader
98
  auto_file_upload = st.file_uploader("Upload .csv with saved automated assessment for model comparison")
99
 
100
-
101
- # If df was uploaded for comparison, we create comparison plot, else simple plot
102
- if auto_file_upload == None:
103
- fig = plot_style_simple(auto_eval_df)
104
- st.pyplot(fig)
105
- else:
106
- fig = plot_style_combined(auto_eval_df,auto_file_upload)
107
- st.pyplot(fig)
108
 
109
  st.download_button(
110
  label="Download automated assessment data",
@@ -113,7 +160,7 @@ try:
113
  mime='text/csv',
114
  )
115
  except KeyError:
116
- st.write('Complete automated assessment to generate summary.')
117
 
118
 
119
  try:
 
11
  # IMPORTANT: Cache the conversion to prevent computation on every rerun
12
  return df[['File_name','Prompt_no','Task','Score']].to_csv().encode('utf-8')
13
 
14
+ def plot_style_simple(results_df, return_table = False):
15
+
16
+
17
  eval_sum = results_df.groupby('Task')['Score'].sum()
18
  eval_count = results_df.groupby('Task')['Score'].count()
19
  eval_share = (eval_sum/eval_count)*100
20
 
21
+ if return_table:
22
+ return_series = results_df.groupby('Task')['Score'].sum()/results_df.groupby('Task')['Score'].count()*100
23
+ return_series = return_series.rename('Percentage correct')
24
+ return return_series
25
+
26
+ # Add small amount to make the bars on plot not disappear
27
+ eval_share = eval_share+1
28
+
29
  fig = plt.figure(figsize=(12, 3))
30
  sns.barplot(x=eval_share.index, y=eval_share.values, palette='GnBu')
31
  plt.xticks(rotation=-45)
 
33
  plt.xlabel(' ')
34
  return fig
35
 
36
+ def plot_style_combined(results_df, uploaded_df = None, return_table=False):
37
  # Create joined dataframe of results and uploadd_df
38
+ uploaded_results_df = uploaded_df
39
  manual_results_df['Model']='Current'
40
  uploaded_results_df['Model']='Uploaded'
41
  results_df = pd.concat([manual_results_df,uploaded_results_df])
 
46
  eval_share = (eval_sum/eval_count)*100
47
  eval_share = eval_share.reset_index()
48
 
49
+ if return_table:
50
+ return_series = results_df.groupby(['Task','Model'])['Score'].sum()/results_df.groupby(['Task','Model'])['Score'].count()*100
51
+ return_series = return_series.rename('Percentage correct')
52
+ return return_series
53
+
54
+ # Add small amount to make the bars on plot not disappear
55
+ eval_share['Score'] = eval_share['Score']+1
56
+
57
  # Create plot
58
  fig = plt.figure(figsize=(12, 3))
59
  sns.barplot(data=eval_share,x='Task',y='Score',hue='Model', palette='GnBu')
 
62
  plt.xlabel(' ')
63
  return fig
64
 
 
65
 
66
+ def print_results_tabs(file_upload, results_df, file_upload_df=None):
67
+ # Create a tab for bar chart and one for table data
68
+ tab1, tab2 = st.tabs(["Bar chart", "Data table"])
69
+ with tab1:
70
+ # If df was uploaded for comparison, we create comparison plot, else simple plot
71
+ if file_upload == None:
72
+ fig = plot_style_simple(results_df)
73
+ st.pyplot(fig)
74
+ else:
75
+ fig = plot_style_combined(results_df,file_upload_df)
76
+ st.pyplot(fig)
77
 
78
+ with tab2:
79
+ # If df was uploaded for comparison, we create comparison table, else simple table
80
+ if file_upload == None:
81
+ table = plot_style_simple(results_df, return_table=True)
82
+ st.write(table)
83
+ else:
84
+ table = plot_style_combined(results_df,file_upload_df, return_table=True)
85
+ st.write(table)
86
 
 
87
 
88
+ def pre_assessment_visualisation(type_str):
89
+ st.write('Complete {0} assessment or upload .csv with saved {0} assessment to generate summary.'.format(type_str))
90
+
91
+ # Display file uploader
92
+ file_upload = st.file_uploader("Upload .csv with saved {0} assessment to plot prior results.".format(type_str))
93
+ if file_upload != None:
94
+ file_upload_df = pd.read_csv(file_upload).copy()
95
+ print_results_tabs(file_upload=None, results_df=file_upload_df)
96
 
97
+ assessment_result_frames = {}
98
+
99
+
100
+ st.title('Assessment Summary')
101
+ st.header('Manual assessment')
102
 
103
  try:
104
  if sum(st.session_state['eval_df']['manual_eval_completed'])>0:
105
  # Display file uploader
106
  manual_file_upload = st.file_uploader("Upload .csv with saved manual assessment for model comparison")
107
+
108
  # Create dataset for manual summary plots
109
  manual_eval_df = st.session_state['eval_df']
110
  manual_eval_df['Score'] = manual_eval_df['manual_eval_task_score'].map({'Yes':True, 'No':False})
 
114
 
115
  assessment_result_frames['Manual assessment'] = manual_results_df
116
 
117
+ # Add plots / tables to page
118
+ try:
119
+ manual_file_upload_df = pd.read_csv(manual_file_upload).copy()
120
+ print_results_tabs(file_upload=manual_file_upload, results_df=manual_results_df, file_upload_df=manual_file_upload_df)
121
+ except ValueError:
122
+ print_results_tabs(file_upload=manual_file_upload, results_df=manual_results_df)
 
123
 
124
  st.download_button(
125
  label="Download manual assessment data",
 
128
  mime='text/csv',
129
  )
130
  else:
131
+ pre_assessment_visualisation(type_str='manual')
132
  except KeyError:
133
+ pre_assessment_visualisation(type_str='manual')
134
+
135
+
136
+
137
 
138
 
139
  st.write(' ')
 
146
  # Display file uploader
147
  auto_file_upload = st.file_uploader("Upload .csv with saved automated assessment for model comparison")
148
 
149
+ # Add plots / tables to page
150
+ try:
151
+ auto_file_upload_df = pd.read_csv(auto_file_upload).copy()
152
+ print_results_tabs(file_upload=auto_file_upload, results_df=auto_eval_df, file_upload_df=auto_file_upload_df)
153
+ except ValueError:
154
+ print_results_tabs(file_upload=auto_file_upload, results_df=auto_eval_df)
 
 
155
 
156
  st.download_button(
157
  label="Download automated assessment data",
 
160
  mime='text/csv',
161
  )
162
  except KeyError:
163
+ pre_assessment_visualisation(type_str='automated')
164
 
165
 
166
  try: