Taejin commited on
Commit
d0951fd
1 Parent(s): ade7a2b

Adding updated app.py

Browse files

Signed-off-by: Taejin Park <[email protected]>

Files changed (3) hide show
  1. app.py +108 -124
  2. content.py +1 -8
  3. scorer.py +4 -4
app.py CHANGED
@@ -1,12 +1,10 @@
1
  import os
2
  import json
3
- import csv
4
  import datetime
5
  from email.utils import parseaddr
6
 
7
  import gradio as gr
8
  import pandas as pd
9
- import numpy as np
10
 
11
  from datasets import load_dataset
12
  from apscheduler.schedulers.background import BackgroundScheduler
@@ -16,19 +14,15 @@ from scorer import instruction_scorer
16
  from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
17
 
18
  TOKEN = os.environ.get("TOKEN", None)
19
- OWNER="ucla-contextual"
20
- TEST_DATASET = f"{OWNER}/contextual_test"
21
- VAL_DATASET = f"{OWNER}/contextual_val"
22
- SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
23
- CONTACT_DATASET = f"{OWNER}/contact_info"
24
- RESULTS_DATASET = f"{OWNER}/results"
25
  LEADERBOARD_PATH = f"{OWNER}/leaderboard"
 
26
  api = HfApi()
27
 
28
  YEAR_VERSION = "2024"
29
 
30
- results = {"dev": {"cpWER": 0, "W
31
-
32
  def read_json_file(filepath):
33
  with open(filepath) as infile:
34
  data_dict = json.load(infile)
@@ -40,50 +34,39 @@ def save_json_file(filepath, data_dict):
40
 
41
  os.makedirs("scored", exist_ok=True)
42
 
43
- # test_data_files = {"test": "contextual_test.csv"}
44
- # test_dataset = load_dataset(TEST_DATASET, data_files=test_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
45
-
46
- # val_data_files = {"val": "contextual_val.csv"}
47
- # val_dataset = load_dataset(VAL_DATASET, data_files=val_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
48
 
49
- # results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"}
50
- # results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
51
 
52
- # contacts_data_files = {"contacts": "contacts.csv"}
53
- # contact_infos = load_dataset(CONTACT_DATASET, data_files=contacts_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
54
 
55
  def get_dataframe_from_results(results, split):
56
  df = results[split].to_pandas()
57
- df.drop(columns=['URL'], inplace=True)
58
- df = df.sort_values(by=["All"], ascending=False)
59
  return df
60
 
61
- # test_dataset_dataframe = test_dataset["test"].to_pandas()
62
- # val_dataset_dataframe = val_dataset["val"].to_pandas()
63
-
64
- # contacts_dataframe = contact_infos["contacts"].to_pandas()
65
-
66
- # val_results_dataframe = get_dataframe_from_results(results=results, split="val")
67
- # test_results_dataframe = get_dataframe_from_results(results=results, split="test")
68
 
69
  def restart_space():
70
  api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
71
 
72
- TYPES = ["markdown", "markdown", "markdown", "number", "number", "number","number", "number", "number", "number", "number", "number"]
 
 
 
73
 
74
  def add_new_eval(
75
- model: str,
76
  method: str,
77
- url: str,
78
  path_to_file: str,
79
  organisation: str,
80
  mail: str,
81
  ):
82
- print("printing all inputs:", model, method, url, path_to_file, organisation, mail)
83
 
84
- if len(model)==0:
85
- print("model none")
86
- raise gr.Error("Please provide a model name. Field empty!")
87
 
88
  if len(method)==0:
89
  print("method none")
@@ -99,121 +82,123 @@ def add_new_eval(
99
  print("email here")
100
  raise gr.Error("Please provide a valid email address.")
101
 
102
-
103
- # Check if the combination model/org already exists and prints a warning message if yes
104
- if model.lower() in set([m.lower() for m in results["val"]["Model"]]) and organisation.lower() in set([o.lower() for o in results["val"]["Organisation"]]):
105
- print("model org combo here")
106
- raise gr.Error("This model has been already submitted.")
107
 
108
  if path_to_file is None:
109
  print("file missing here")
110
  raise gr.Error("Please attach a file.")
111
 
112
- tmp_file_output = read_json_file(path_to_file.name)
113
-
114
- if len(tmp_file_output.keys())!=1:
115
- print("file format wrong here")
116
- raise gr.Error("Submission file format incorrect. Please refer to the format description!")
117
-
118
- tmp_output_key = list(tmp_file_output.keys())[0]
119
- if len(tmp_file_output[tmp_output_key].keys())!=100:
120
- print("file not 100 here")
121
- raise gr.Error("File must contain exactly 100 predictions.")
122
 
123
  # Save submitted file
124
  time_atm = datetime.datetime.today()
125
  api.upload_file(
126
  repo_id=SUBMISSION_DATASET,
127
  path_or_fileobj=path_to_file.name,
128
- path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_raw_{time_atm}.json",
129
  repo_type="dataset",
130
  token=TOKEN
131
  )
132
 
133
  # Compute score
134
- file_path = path_to_file.name
135
- scores = instruction_scorer(val_dataset_dataframe, file_path , model)
136
-
137
- path_or_fileobj=f"scored/{organisation}_{model}.json"
138
- save_json_file(path_or_fileobj, scores)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # Save scored file
141
  api.upload_file(
142
  repo_id=SUBMISSION_DATASET,
143
  path_or_fileobj=path_or_fileobj,
144
- path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_scored_{time_atm}.json",
145
  repo_type="dataset",
146
  token=TOKEN
147
  )
148
 
149
  # Actual submission
150
  eval_entry = {
151
- "Model": model,
152
- "Method":method,
153
- "Organisation": organisation,
154
- "URL": url,
155
- "All":scores["average"],
156
- "Time":scores["time"],
157
- "Shopping":scores["shopping"],
158
- "Navigation":scores["navigation-transportation"],
159
- "Abstract":scores["abstract"],
160
- "Application Usage":scores["app"],
161
- "Web Usage":scores["web"],
162
- "Infographic":scores["infographics"],
163
- "Miscellaneous Natural Scenes": scores["misc"]
164
- }
165
-
166
- val_results_dataframe = get_dataframe_from_results(results=results, split="val")
167
- val_results_dataframe = pd.concat([val_results_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
168
- val_results_dataframe.to_csv('contextual_val_results.csv', index=False)
169
-
170
- api.upload_file(
171
- repo_id=RESULTS_DATASET,
172
- path_or_fileobj="contextual_val_results.csv",
173
- path_in_repo=f"contextual_val_results.csv",
174
- repo_type="dataset",
175
- token=TOKEN
176
- )
177
-
178
- contact_info = {
179
- "Model": model,
180
- "URL": url,
181
- "Organisation": organisation,
182
- "Mail": mail,
183
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
- contacts_dataframe = contact_infos["contacts"].to_pandas()
186
- contacts_dataframe = pd.concat([contacts_dataframe, pd.DataFrame([contact_info])], ignore_index=True)
187
- contacts_dataframe.to_csv('contacts.csv', index=False)
188
-
189
- api.upload_file(
190
- repo_id=CONTACT_DATASET,
191
- path_or_fileobj="contacts.csv",
192
- path_in_repo=f"contacts.csv",
193
- repo_type="dataset",
194
- token=TOKEN
195
- )
196
-
197
- return format_log(f"Model {model} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed")
198
 
199
 
200
  def refresh():
201
- results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"}
202
- results = load_dataset(RESULTS_DATASET, data_files=
203
- results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
204
- val_results_dataframe = get_dataframe_from_results(results=results, split="val")
205
- test_results_dataframe = get_dataframe_from_results(results=results, split="test")
206
- return val_results_dataframe, test_results_dataframe
207
 
208
  def upload_file(files):
209
  file_paths = [file.name for file in files]
210
  return file_paths
211
 
 
 
 
 
 
 
 
 
212
 
213
  demo = gr.Blocks()
214
  with demo:
215
  gr.HTML(TITLE)
216
- # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
217
 
218
  with gr.Row():
219
  with gr.Accordion("🧐 Introduction", open=False):
@@ -230,14 +215,15 @@ with demo:
230
  label=CITATION_BUTTON_LABEL,
231
  elem_id="citation-button",
232
  )
233
- with gr.Tab("Results: Test"):
234
- leaderboard_table_test = gr.components.Dataframe(
235
- value=test_results_dataframe, datatype=TYPES, interactive=False,
236
  column_widths=["20%"]
237
  )
238
- with gr.Tab("Results: Val"):
239
- leaderboard_table_val = gr.components.Dataframe(
240
- value=val_results_dataframe, datatype=TYPES, interactive=False,
 
241
  column_widths=["20%"]
242
  )
243
 
@@ -246,18 +232,17 @@ with demo:
246
  refresh,
247
  inputs=[],
248
  outputs=[
249
- leaderboard_table_val,
250
- leaderboard_table_test,
251
  ],
252
  )
253
- with gr.Accordion("Submit a new model for evaluation"):
254
  with gr.Row():
255
  with gr.Column():
256
- model_name_textbox = gr.Textbox(label="Model name", type='text')
257
- method_textbox = gr.Textbox(label="Method (LMM or Aug LLM or any other)", type='text')
258
- url_textbox = gr.Textbox(label="URL to model information", type='text')
259
  with gr.Column():
260
- organisation = gr.Textbox(label="Organisation", type='text')
261
  mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email')
262
  file_output = gr.File()
263
 
@@ -267,9 +252,8 @@ with demo:
267
  submit_button.click(
268
  add_new_eval,
269
  [
270
- model_name_textbox,
271
  method_textbox,
272
- url_textbox,
273
  file_output,
274
  organisation,
275
  mail
 
1
  import os
2
  import json
 
3
  import datetime
4
  from email.utils import parseaddr
5
 
6
  import gradio as gr
7
  import pandas as pd
 
8
 
9
  from datasets import load_dataset
10
  from apscheduler.schedulers.background import BackgroundScheduler
 
14
  from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
15
 
16
  TOKEN = os.environ.get("TOKEN", None)
17
+ OWNER="Taejin"
18
+ REF_JSONS_SET = f"{OWNER}/speaker_tagging_lb_refs"
19
+ RESULTS_DATASET = f"{OWNER}/spk_tag_results"
 
 
 
20
  LEADERBOARD_PATH = f"{OWNER}/leaderboard"
21
+ SUBMISSION_DATASET = f"{OWNER}/submission_leaderboard"
22
  api = HfApi()
23
 
24
  YEAR_VERSION = "2024"
25
 
 
 
26
  def read_json_file(filepath):
27
  with open(filepath) as infile:
28
  data_dict = json.load(infile)
 
34
 
35
  os.makedirs("scored", exist_ok=True)
36
 
37
+ results_data_files = {"dev": "dev_set_data.csv", "eval": "eval_set_data.csv"}
38
+ results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
 
 
 
39
 
 
 
40
 
41
+ ref_json_files = {"dev_src": "err_dev.src.seglst.json", "dev_ref": "err_dev.ref.seglst.json", "eval_src": "err_eval.src.seglst.json", "eval_ref": "err_eval.ref.seglst.json"}
42
+ ref_jsons = load_dataset(REF_JSONS_SET, data_files=ref_json_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
43
 
44
  def get_dataframe_from_results(results, split):
45
  df = results[split].to_pandas()
46
+ df = df.sort_values(by=["cpWER"], ascending=True)
 
47
  return df
48
 
 
 
 
 
 
 
 
49
 
50
  def restart_space():
51
  api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
52
 
53
+ TYPES = ["markdown", "markdown", "markdown", "number", "number", "number"]
54
+
55
+ dev_dataset_dataframe= get_dataframe_from_results(results=results, split="dev")
56
+ eval_dataset_dataframe= get_dataframe_from_results(results=results, split="eval")
57
 
58
  def add_new_eval(
59
+ system_name: str,
60
  method: str,
 
61
  path_to_file: str,
62
  organisation: str,
63
  mail: str,
64
  ):
65
+ print(f"printing all inputs system_name: {system_name}, method: {method}, path_to_file: {path_to_file}, organisation: {organisation}, mail: {mail}")
66
 
67
+ if len(system_name)==0:
68
+ print("system_name none")
69
+ raise gr.Error("Please provide a system_name name. Field empty!")
70
 
71
  if len(method)==0:
72
  print("method none")
 
82
  print("email here")
83
  raise gr.Error("Please provide a valid email address.")
84
 
85
+ # Check if the combination system_name/org already exists and prints a warning message if yes
86
+ # if system_name.lower() in set([m.lower() for m in results["dev"]["System_name"]]) and organisation.lower() in set([o.lower() for o in results["dev"]["Organisation"]]):
87
+ # print("system_name org combo here")
88
+ # raise gr.Error("This system_name has been already submitted.")
 
89
 
90
  if path_to_file is None:
91
  print("file missing here")
92
  raise gr.Error("Please attach a file.")
93
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  # Save submitted file
96
  time_atm = datetime.datetime.today()
97
  api.upload_file(
98
  repo_id=SUBMISSION_DATASET,
99
  path_or_fileobj=path_to_file.name,
100
+ path_in_repo=f"{organisation}/{system_name}/{YEAR_VERSION}_raw_{time_atm}.json",
101
  repo_type="dataset",
102
  token=TOKEN
103
  )
104
 
105
  # Compute score
106
+ if "err_dev.hyp.seglst.json" in path_to_file.name:
107
+ ref_file_path="seglst_files/err_dev.ref.seglst.json"
108
+ mode = "dev"
109
+ elif "err_eval.hyp.seglst.json" in path_to_file.name:
110
+ ref_file_path="seglst_files/err_eval.ref.seglst.json"
111
+ mode = "eval"
112
+ else:
113
+ basename = os.path.basename(path_to_file.name)
114
+ raise gr.Error(f"{basename} is NOT a valid name. It should be either err_dev.hyp.seglst.json or err_eval.hyp.seglst.json")
115
+ scores = instruction_scorer(file_path_input= path_to_file.name, ref_file_path=ref_file_path, system_name=system_name)
116
+
117
+ path_or_fileobj=f"scored/{organisation}_{system_name}.json"
118
+ scores_and_info = {
119
+ "system_name": system_name,
120
+ "method":method,
121
+ "organisation": organisation,
122
+ "email": mail,
123
+ "cpWER": scores["cpWER"],
124
+ "errors": scores["errors"],
125
+ "length": scores["length"],
126
+ }
127
+ save_json_file(path_or_fileobj, data_dict=scores_and_info)
128
 
129
  # Save scored file
130
  api.upload_file(
131
  repo_id=SUBMISSION_DATASET,
132
  path_or_fileobj=path_or_fileobj,
133
+ path_in_repo=f"{organisation}/{system_name}/{YEAR_VERSION}_scored_{time_atm}.json",
134
  repo_type="dataset",
135
  token=TOKEN
136
  )
137
 
138
  # Actual submission
139
  eval_entry = {
140
+ "system_name": system_name,
141
+ "method":method,
142
+ "organisation": organisation,
143
+ "cpWER":scores["cpWER"],
144
+ "errors":scores["errors"],
145
+ "length":scores["length"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  }
147
+
148
+ if mode == "dev":
149
+ dev_set_data_csv = "dev_set_data.csv"
150
+ dev_dataset_dataframe = get_dataframe_from_results(results=results, split="dev")
151
+ dev_dataset_dataframe = pd.concat([dev_dataset_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
152
+ dev_dataset_dataframe.to_csv(dev_set_data_csv, index=False)
153
+
154
+ api.upload_file(
155
+ repo_id=RESULTS_DATASET,
156
+ path_or_fileobj=dev_set_data_csv,
157
+ path_in_repo=dev_set_data_csv,
158
+ repo_type="dataset",
159
+ token=TOKEN
160
+ )
161
+ elif mode == "eval":
162
+ eval_set_data_csv = "eval_set_data.csv"
163
+ eval_dataset_dataframe = get_dataframe_from_results(results=results, split="eval")
164
+ eval_dataset_dataframe = pd.concat([eval_dataset_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
165
+ eval_dataset_dataframe.to_csv(eval_set_data_csv, index=False)
166
+
167
+ api.upload_file(
168
+ repo_id=RESULTS_DATASET,
169
+ path_or_fileobj=eval_set_data_csv,
170
+ path_in_repo=eval_set_data_csv,
171
+ repo_type="dataset",
172
+ token=TOKEN
173
+ )
174
 
175
+ return format_log(f"system_name {system_name} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed")
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
 
178
  def refresh():
179
+ results_data_files = {"dev": "dev_set_data.csv", "eval": "eval_set_data.csv"}
180
+ results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
181
+ dev_results_dataframe = get_dataframe_from_results(results=results, split="dev")
182
+ eval_results_dataframe = get_dataframe_from_results(results=results, split="eval")
183
+ return dev_results_dataframe, eval_results_dataframe
 
184
 
185
  def upload_file(files):
186
  file_paths = [file.name for file in files]
187
  return file_paths
188
 
189
+ for file_key in ['dev_src', 'dev_ref', 'eval_src', 'eval_ref']:
190
+ ref_jsons[file_key].to_json(path_or_buf=f"seglst_files/{file_key}.json")
191
+ buff_list = [x.strip() for x in open(f"seglst_files/{file_key}.json").readlines()]
192
+ buff_str = ",\n".join(buff_list)
193
+ seglst_json = f"[\n{buff_str}\n]"
194
+ split, datatype = file_key.split("_")
195
+ with open(f"seglst_files/err_{split}.{datatype}.seglst.json", "w") as f:
196
+ f.write(seglst_json)
197
 
198
  demo = gr.Blocks()
199
  with demo:
200
  gr.HTML(TITLE)
201
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
202
 
203
  with gr.Row():
204
  with gr.Accordion("🧐 Introduction", open=False):
 
215
  label=CITATION_BUTTON_LABEL,
216
  elem_id="citation-button",
217
  )
218
+ with gr.Tab("Results: Dev"):
219
+ leaderboard_table_dev = gr.components.Dataframe(
220
+ value=dev_dataset_dataframe, datatype=TYPES, interactive=False,
221
  column_widths=["20%"]
222
  )
223
+
224
+ with gr.Tab("Results: Eval"):
225
+ leaderboard_table_eval = gr.components.Dataframe(
226
+ value=eval_dataset_dataframe, datatype=TYPES, interactive=False,
227
  column_widths=["20%"]
228
  )
229
 
 
232
  refresh,
233
  inputs=[],
234
  outputs=[
235
+ leaderboard_table_dev,
236
+ leaderboard_table_eval,
237
  ],
238
  )
239
+ with gr.Accordion("Submit a new system_name for evaluation"):
240
  with gr.Row():
241
  with gr.Column():
242
+ system_name_textbox = gr.Textbox(label="System name", type='text')
243
+ method_textbox = gr.Textbox(label="Method (LLM with prompt, beam-search, etc)", type='text')
 
244
  with gr.Column():
245
+ organisation = gr.Textbox(label="Organisation or Team Name", type='text')
246
  mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email')
247
  file_output = gr.File()
248
 
 
252
  submit_button.click(
253
  add_new_eval,
254
  [
255
+ system_name_textbox,
256
  method_textbox,
 
257
  file_output,
258
  organisation,
259
  mail
content.py CHANGED
@@ -76,14 +76,7 @@ There should be 506 predictions, corresponding to the 506 urls of the test set.
76
 
77
 
78
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
79
- CITATION_BUTTON_TEXT = r"""@misc{wadhawan2024contextual,
80
- title={ConTextual: Evaluating Context-Sensitive Text-Rich Visual Reasoning in Large Multimodal Models},
81
- author={Rohan Wadhawan and Hritik Bansal and Kai-Wei Chang and Nanyun Peng},
82
- year={2024},
83
- eprint={2401.13311},
84
- archivePrefix={arXiv},
85
- primaryClass={cs.CV}
86
- }"""
87
 
88
 
89
  def format_error(msg):
 
76
 
77
 
78
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
79
+ CITATION_BUTTON_TEXT = ""
 
 
 
 
 
 
 
80
 
81
 
82
  def format_error(msg):
scorer.py CHANGED
@@ -16,15 +16,15 @@ def instruction_scorer(file_path_input, ref_file_path, system_name):
16
  subprocess.run(cmd_hyp)
17
 
18
  # Read the JSON file and print the cpWER
19
- asrdiar_file_name="err_dev"
20
- output_cpwer_hyp_json_file = os.path.join(f"{asrdiar_file_name}.hyp.seglst_cpwer.json")
 
21
  with open(output_cpwer_hyp_json_file, "r") as temp_file:
22
  data_h = json.load(temp_file)
23
  print("Hypothesis cpWER:", data_h["error_rate"])
24
  cpwer = data_h["error_rate"]
25
  logging.info(f"-> HYPOTHESIS cpWER={cpwer:.4f}")
26
-
27
- scores_dict = {"cpWER": cpwer, "WER": cpwer}
28
  return scores_dict
29
 
30
 
 
16
  subprocess.run(cmd_hyp)
17
 
18
  # Read the JSON file and print the cpWER
19
+ print("file_path_input:", file_path_input)
20
+ output_cpwer_hyp_json_file = file_path_input.replace(".hyp.seglst.json", ".hyp.seglst_cpwer.json")
21
+
22
  with open(output_cpwer_hyp_json_file, "r") as temp_file:
23
  data_h = json.load(temp_file)
24
  print("Hypothesis cpWER:", data_h["error_rate"])
25
  cpwer = data_h["error_rate"]
26
  logging.info(f"-> HYPOTHESIS cpWER={cpwer:.4f}")
27
+ scores_dict = {"cpWER": cpwer, "errors": data_h["errors"], "length": data_h["length"]}
 
28
  return scores_dict
29
 
30