Spaces:
Runtime error
Runtime error
Adding updated app.py
Browse filesSigned-off-by: Taejin Park <[email protected]>
- app.py +108 -124
- content.py +1 -8
- scorer.py +4 -4
app.py
CHANGED
@@ -1,12 +1,10 @@
|
|
1 |
import os
|
2 |
import json
|
3 |
-
import csv
|
4 |
import datetime
|
5 |
from email.utils import parseaddr
|
6 |
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
-
import numpy as np
|
10 |
|
11 |
from datasets import load_dataset
|
12 |
from apscheduler.schedulers.background import BackgroundScheduler
|
@@ -16,19 +14,15 @@ from scorer import instruction_scorer
|
|
16 |
from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
|
17 |
|
18 |
TOKEN = os.environ.get("TOKEN", None)
|
19 |
-
OWNER="
|
20 |
-
|
21 |
-
|
22 |
-
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
|
23 |
-
CONTACT_DATASET = f"{OWNER}/contact_info"
|
24 |
-
RESULTS_DATASET = f"{OWNER}/results"
|
25 |
LEADERBOARD_PATH = f"{OWNER}/leaderboard"
|
|
|
26 |
api = HfApi()
|
27 |
|
28 |
YEAR_VERSION = "2024"
|
29 |
|
30 |
-
results = {"dev": {"cpWER": 0, "W
|
31 |
-
|
32 |
def read_json_file(filepath):
|
33 |
with open(filepath) as infile:
|
34 |
data_dict = json.load(infile)
|
@@ -40,50 +34,39 @@ def save_json_file(filepath, data_dict):
|
|
40 |
|
41 |
os.makedirs("scored", exist_ok=True)
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
# val_data_files = {"val": "contextual_val.csv"}
|
47 |
-
# val_dataset = load_dataset(VAL_DATASET, data_files=val_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
|
48 |
|
49 |
-
# results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"}
|
50 |
-
# results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
|
51 |
|
52 |
-
|
53 |
-
|
54 |
|
55 |
def get_dataframe_from_results(results, split):
|
56 |
df = results[split].to_pandas()
|
57 |
-
df.
|
58 |
-
df = df.sort_values(by=["All"], ascending=False)
|
59 |
return df
|
60 |
|
61 |
-
# test_dataset_dataframe = test_dataset["test"].to_pandas()
|
62 |
-
# val_dataset_dataframe = val_dataset["val"].to_pandas()
|
63 |
-
|
64 |
-
# contacts_dataframe = contact_infos["contacts"].to_pandas()
|
65 |
-
|
66 |
-
# val_results_dataframe = get_dataframe_from_results(results=results, split="val")
|
67 |
-
# test_results_dataframe = get_dataframe_from_results(results=results, split="test")
|
68 |
|
69 |
def restart_space():
|
70 |
api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
|
71 |
|
72 |
-
TYPES = ["markdown", "markdown", "markdown", "number", "number", "number"
|
|
|
|
|
|
|
73 |
|
74 |
def add_new_eval(
|
75 |
-
|
76 |
method: str,
|
77 |
-
url: str,
|
78 |
path_to_file: str,
|
79 |
organisation: str,
|
80 |
mail: str,
|
81 |
):
|
82 |
-
print("printing all inputs:
|
83 |
|
84 |
-
if len(
|
85 |
-
print("
|
86 |
-
raise gr.Error("Please provide a
|
87 |
|
88 |
if len(method)==0:
|
89 |
print("method none")
|
@@ -99,121 +82,123 @@ def add_new_eval(
|
|
99 |
print("email here")
|
100 |
raise gr.Error("Please provide a valid email address.")
|
101 |
|
102 |
-
|
103 |
-
#
|
104 |
-
|
105 |
-
|
106 |
-
raise gr.Error("This model has been already submitted.")
|
107 |
|
108 |
if path_to_file is None:
|
109 |
print("file missing here")
|
110 |
raise gr.Error("Please attach a file.")
|
111 |
|
112 |
-
tmp_file_output = read_json_file(path_to_file.name)
|
113 |
-
|
114 |
-
if len(tmp_file_output.keys())!=1:
|
115 |
-
print("file format wrong here")
|
116 |
-
raise gr.Error("Submission file format incorrect. Please refer to the format description!")
|
117 |
-
|
118 |
-
tmp_output_key = list(tmp_file_output.keys())[0]
|
119 |
-
if len(tmp_file_output[tmp_output_key].keys())!=100:
|
120 |
-
print("file not 100 here")
|
121 |
-
raise gr.Error("File must contain exactly 100 predictions.")
|
122 |
|
123 |
# Save submitted file
|
124 |
time_atm = datetime.datetime.today()
|
125 |
api.upload_file(
|
126 |
repo_id=SUBMISSION_DATASET,
|
127 |
path_or_fileobj=path_to_file.name,
|
128 |
-
path_in_repo=f"{organisation}/{
|
129 |
repo_type="dataset",
|
130 |
token=TOKEN
|
131 |
)
|
132 |
|
133 |
# Compute score
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
# Save scored file
|
141 |
api.upload_file(
|
142 |
repo_id=SUBMISSION_DATASET,
|
143 |
path_or_fileobj=path_or_fileobj,
|
144 |
-
path_in_repo=f"{organisation}/{
|
145 |
repo_type="dataset",
|
146 |
token=TOKEN
|
147 |
)
|
148 |
|
149 |
# Actual submission
|
150 |
eval_entry = {
|
151 |
-
"
|
152 |
-
"
|
153 |
-
"
|
154 |
-
"
|
155 |
-
"
|
156 |
-
"
|
157 |
-
"Shopping":scores["shopping"],
|
158 |
-
"Navigation":scores["navigation-transportation"],
|
159 |
-
"Abstract":scores["abstract"],
|
160 |
-
"Application Usage":scores["app"],
|
161 |
-
"Web Usage":scores["web"],
|
162 |
-
"Infographic":scores["infographics"],
|
163 |
-
"Miscellaneous Natural Scenes": scores["misc"]
|
164 |
-
}
|
165 |
-
|
166 |
-
val_results_dataframe = get_dataframe_from_results(results=results, split="val")
|
167 |
-
val_results_dataframe = pd.concat([val_results_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
|
168 |
-
val_results_dataframe.to_csv('contextual_val_results.csv', index=False)
|
169 |
-
|
170 |
-
api.upload_file(
|
171 |
-
repo_id=RESULTS_DATASET,
|
172 |
-
path_or_fileobj="contextual_val_results.csv",
|
173 |
-
path_in_repo=f"contextual_val_results.csv",
|
174 |
-
repo_type="dataset",
|
175 |
-
token=TOKEN
|
176 |
-
)
|
177 |
-
|
178 |
-
contact_info = {
|
179 |
-
"Model": model,
|
180 |
-
"URL": url,
|
181 |
-
"Organisation": organisation,
|
182 |
-
"Mail": mail,
|
183 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
-
|
186 |
-
contacts_dataframe = pd.concat([contacts_dataframe, pd.DataFrame([contact_info])], ignore_index=True)
|
187 |
-
contacts_dataframe.to_csv('contacts.csv', index=False)
|
188 |
-
|
189 |
-
api.upload_file(
|
190 |
-
repo_id=CONTACT_DATASET,
|
191 |
-
path_or_fileobj="contacts.csv",
|
192 |
-
path_in_repo=f"contacts.csv",
|
193 |
-
repo_type="dataset",
|
194 |
-
token=TOKEN
|
195 |
-
)
|
196 |
-
|
197 |
-
return format_log(f"Model {model} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed")
|
198 |
|
199 |
|
200 |
def refresh():
|
201 |
-
results_data_files = {"
|
202 |
-
results = load_dataset(RESULTS_DATASET, data_files=
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
return val_results_dataframe, test_results_dataframe
|
207 |
|
208 |
def upload_file(files):
|
209 |
file_paths = [file.name for file in files]
|
210 |
return file_paths
|
211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
demo = gr.Blocks()
|
214 |
with demo:
|
215 |
gr.HTML(TITLE)
|
216 |
-
|
217 |
|
218 |
with gr.Row():
|
219 |
with gr.Accordion("🧐 Introduction", open=False):
|
@@ -230,14 +215,15 @@ with demo:
|
|
230 |
label=CITATION_BUTTON_LABEL,
|
231 |
elem_id="citation-button",
|
232 |
)
|
233 |
-
with gr.Tab("Results:
|
234 |
-
|
235 |
-
value=
|
236 |
column_widths=["20%"]
|
237 |
)
|
238 |
-
|
239 |
-
|
240 |
-
|
|
|
241 |
column_widths=["20%"]
|
242 |
)
|
243 |
|
@@ -246,18 +232,17 @@ with demo:
|
|
246 |
refresh,
|
247 |
inputs=[],
|
248 |
outputs=[
|
249 |
-
|
250 |
-
|
251 |
],
|
252 |
)
|
253 |
-
with gr.Accordion("Submit a new
|
254 |
with gr.Row():
|
255 |
with gr.Column():
|
256 |
-
|
257 |
-
method_textbox = gr.Textbox(label="Method (
|
258 |
-
url_textbox = gr.Textbox(label="URL to model information", type='text')
|
259 |
with gr.Column():
|
260 |
-
organisation = gr.Textbox(label="Organisation", type='text')
|
261 |
mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email')
|
262 |
file_output = gr.File()
|
263 |
|
@@ -267,9 +252,8 @@ with demo:
|
|
267 |
submit_button.click(
|
268 |
add_new_eval,
|
269 |
[
|
270 |
-
|
271 |
method_textbox,
|
272 |
-
url_textbox,
|
273 |
file_output,
|
274 |
organisation,
|
275 |
mail
|
|
|
1 |
import os
|
2 |
import json
|
|
|
3 |
import datetime
|
4 |
from email.utils import parseaddr
|
5 |
|
6 |
import gradio as gr
|
7 |
import pandas as pd
|
|
|
8 |
|
9 |
from datasets import load_dataset
|
10 |
from apscheduler.schedulers.background import BackgroundScheduler
|
|
|
14 |
from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
|
15 |
|
16 |
TOKEN = os.environ.get("TOKEN", None)
|
17 |
+
OWNER="Taejin"
|
18 |
+
REF_JSONS_SET = f"{OWNER}/speaker_tagging_lb_refs"
|
19 |
+
RESULTS_DATASET = f"{OWNER}/spk_tag_results"
|
|
|
|
|
|
|
20 |
LEADERBOARD_PATH = f"{OWNER}/leaderboard"
|
21 |
+
SUBMISSION_DATASET = f"{OWNER}/submission_leaderboard"
|
22 |
api = HfApi()
|
23 |
|
24 |
YEAR_VERSION = "2024"
|
25 |
|
|
|
|
|
26 |
def read_json_file(filepath):
|
27 |
with open(filepath) as infile:
|
28 |
data_dict = json.load(infile)
|
|
|
34 |
|
35 |
os.makedirs("scored", exist_ok=True)
|
36 |
|
37 |
+
results_data_files = {"dev": "dev_set_data.csv", "eval": "eval_set_data.csv"}
|
38 |
+
results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
|
|
|
|
|
|
|
39 |
|
|
|
|
|
40 |
|
41 |
+
ref_json_files = {"dev_src": "err_dev.src.seglst.json", "dev_ref": "err_dev.ref.seglst.json", "eval_src": "err_eval.src.seglst.json", "eval_ref": "err_eval.ref.seglst.json"}
|
42 |
+
ref_jsons = load_dataset(REF_JSONS_SET, data_files=ref_json_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
|
43 |
|
44 |
def get_dataframe_from_results(results, split):
|
45 |
df = results[split].to_pandas()
|
46 |
+
df = df.sort_values(by=["cpWER"], ascending=True)
|
|
|
47 |
return df
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
def restart_space():
|
51 |
api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
|
52 |
|
53 |
+
TYPES = ["markdown", "markdown", "markdown", "number", "number", "number"]
|
54 |
+
|
55 |
+
dev_dataset_dataframe= get_dataframe_from_results(results=results, split="dev")
|
56 |
+
eval_dataset_dataframe= get_dataframe_from_results(results=results, split="eval")
|
57 |
|
58 |
def add_new_eval(
|
59 |
+
system_name: str,
|
60 |
method: str,
|
|
|
61 |
path_to_file: str,
|
62 |
organisation: str,
|
63 |
mail: str,
|
64 |
):
|
65 |
+
print(f"printing all inputs system_name: {system_name}, method: {method}, path_to_file: {path_to_file}, organisation: {organisation}, mail: {mail}")
|
66 |
|
67 |
+
if len(system_name)==0:
|
68 |
+
print("system_name none")
|
69 |
+
raise gr.Error("Please provide a system_name name. Field empty!")
|
70 |
|
71 |
if len(method)==0:
|
72 |
print("method none")
|
|
|
82 |
print("email here")
|
83 |
raise gr.Error("Please provide a valid email address.")
|
84 |
|
85 |
+
# Check if the combination system_name/org already exists and prints a warning message if yes
|
86 |
+
# if system_name.lower() in set([m.lower() for m in results["dev"]["System_name"]]) and organisation.lower() in set([o.lower() for o in results["dev"]["Organisation"]]):
|
87 |
+
# print("system_name org combo here")
|
88 |
+
# raise gr.Error("This system_name has been already submitted.")
|
|
|
89 |
|
90 |
if path_to_file is None:
|
91 |
print("file missing here")
|
92 |
raise gr.Error("Please attach a file.")
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
# Save submitted file
|
96 |
time_atm = datetime.datetime.today()
|
97 |
api.upload_file(
|
98 |
repo_id=SUBMISSION_DATASET,
|
99 |
path_or_fileobj=path_to_file.name,
|
100 |
+
path_in_repo=f"{organisation}/{system_name}/{YEAR_VERSION}_raw_{time_atm}.json",
|
101 |
repo_type="dataset",
|
102 |
token=TOKEN
|
103 |
)
|
104 |
|
105 |
# Compute score
|
106 |
+
if "err_dev.hyp.seglst.json" in path_to_file.name:
|
107 |
+
ref_file_path="seglst_files/err_dev.ref.seglst.json"
|
108 |
+
mode = "dev"
|
109 |
+
elif "err_eval.hyp.seglst.json" in path_to_file.name:
|
110 |
+
ref_file_path="seglst_files/err_eval.ref.seglst.json"
|
111 |
+
mode = "eval"
|
112 |
+
else:
|
113 |
+
basename = os.path.basename(path_to_file.name)
|
114 |
+
raise gr.Error(f"{basename} is NOT a valid name. It should be either err_dev.hyp.seglst.json or err_eval.hyp.seglst.json")
|
115 |
+
scores = instruction_scorer(file_path_input= path_to_file.name, ref_file_path=ref_file_path, system_name=system_name)
|
116 |
+
|
117 |
+
path_or_fileobj=f"scored/{organisation}_{system_name}.json"
|
118 |
+
scores_and_info = {
|
119 |
+
"system_name": system_name,
|
120 |
+
"method":method,
|
121 |
+
"organisation": organisation,
|
122 |
+
"email": mail,
|
123 |
+
"cpWER": scores["cpWER"],
|
124 |
+
"errors": scores["errors"],
|
125 |
+
"length": scores["length"],
|
126 |
+
}
|
127 |
+
save_json_file(path_or_fileobj, data_dict=scores_and_info)
|
128 |
|
129 |
# Save scored file
|
130 |
api.upload_file(
|
131 |
repo_id=SUBMISSION_DATASET,
|
132 |
path_or_fileobj=path_or_fileobj,
|
133 |
+
path_in_repo=f"{organisation}/{system_name}/{YEAR_VERSION}_scored_{time_atm}.json",
|
134 |
repo_type="dataset",
|
135 |
token=TOKEN
|
136 |
)
|
137 |
|
138 |
# Actual submission
|
139 |
eval_entry = {
|
140 |
+
"system_name": system_name,
|
141 |
+
"method":method,
|
142 |
+
"organisation": organisation,
|
143 |
+
"cpWER":scores["cpWER"],
|
144 |
+
"errors":scores["errors"],
|
145 |
+
"length":scores["length"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
}
|
147 |
+
|
148 |
+
if mode == "dev":
|
149 |
+
dev_set_data_csv = "dev_set_data.csv"
|
150 |
+
dev_dataset_dataframe = get_dataframe_from_results(results=results, split="dev")
|
151 |
+
dev_dataset_dataframe = pd.concat([dev_dataset_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
|
152 |
+
dev_dataset_dataframe.to_csv(dev_set_data_csv, index=False)
|
153 |
+
|
154 |
+
api.upload_file(
|
155 |
+
repo_id=RESULTS_DATASET,
|
156 |
+
path_or_fileobj=dev_set_data_csv,
|
157 |
+
path_in_repo=dev_set_data_csv,
|
158 |
+
repo_type="dataset",
|
159 |
+
token=TOKEN
|
160 |
+
)
|
161 |
+
elif mode == "eval":
|
162 |
+
eval_set_data_csv = "eval_set_data.csv"
|
163 |
+
eval_dataset_dataframe = get_dataframe_from_results(results=results, split="eval")
|
164 |
+
eval_dataset_dataframe = pd.concat([eval_dataset_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
|
165 |
+
eval_dataset_dataframe.to_csv(eval_set_data_csv, index=False)
|
166 |
+
|
167 |
+
api.upload_file(
|
168 |
+
repo_id=RESULTS_DATASET,
|
169 |
+
path_or_fileobj=eval_set_data_csv,
|
170 |
+
path_in_repo=eval_set_data_csv,
|
171 |
+
repo_type="dataset",
|
172 |
+
token=TOKEN
|
173 |
+
)
|
174 |
|
175 |
+
return format_log(f"system_name {system_name} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
|
178 |
def refresh():
|
179 |
+
results_data_files = {"dev": "dev_set_data.csv", "eval": "eval_set_data.csv"}
|
180 |
+
results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
|
181 |
+
dev_results_dataframe = get_dataframe_from_results(results=results, split="dev")
|
182 |
+
eval_results_dataframe = get_dataframe_from_results(results=results, split="eval")
|
183 |
+
return dev_results_dataframe, eval_results_dataframe
|
|
|
184 |
|
185 |
def upload_file(files):
|
186 |
file_paths = [file.name for file in files]
|
187 |
return file_paths
|
188 |
|
189 |
+
for file_key in ['dev_src', 'dev_ref', 'eval_src', 'eval_ref']:
|
190 |
+
ref_jsons[file_key].to_json(path_or_buf=f"seglst_files/{file_key}.json")
|
191 |
+
buff_list = [x.strip() for x in open(f"seglst_files/{file_key}.json").readlines()]
|
192 |
+
buff_str = ",\n".join(buff_list)
|
193 |
+
seglst_json = f"[\n{buff_str}\n]"
|
194 |
+
split, datatype = file_key.split("_")
|
195 |
+
with open(f"seglst_files/err_{split}.{datatype}.seglst.json", "w") as f:
|
196 |
+
f.write(seglst_json)
|
197 |
|
198 |
demo = gr.Blocks()
|
199 |
with demo:
|
200 |
gr.HTML(TITLE)
|
201 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
202 |
|
203 |
with gr.Row():
|
204 |
with gr.Accordion("🧐 Introduction", open=False):
|
|
|
215 |
label=CITATION_BUTTON_LABEL,
|
216 |
elem_id="citation-button",
|
217 |
)
|
218 |
+
with gr.Tab("Results: Dev"):
|
219 |
+
leaderboard_table_dev = gr.components.Dataframe(
|
220 |
+
value=dev_dataset_dataframe, datatype=TYPES, interactive=False,
|
221 |
column_widths=["20%"]
|
222 |
)
|
223 |
+
|
224 |
+
with gr.Tab("Results: Eval"):
|
225 |
+
leaderboard_table_eval = gr.components.Dataframe(
|
226 |
+
value=eval_dataset_dataframe, datatype=TYPES, interactive=False,
|
227 |
column_widths=["20%"]
|
228 |
)
|
229 |
|
|
|
232 |
refresh,
|
233 |
inputs=[],
|
234 |
outputs=[
|
235 |
+
leaderboard_table_dev,
|
236 |
+
leaderboard_table_eval,
|
237 |
],
|
238 |
)
|
239 |
+
with gr.Accordion("Submit a new system_name for evaluation"):
|
240 |
with gr.Row():
|
241 |
with gr.Column():
|
242 |
+
system_name_textbox = gr.Textbox(label="System name", type='text')
|
243 |
+
method_textbox = gr.Textbox(label="Method (LLM with prompt, beam-search, etc)", type='text')
|
|
|
244 |
with gr.Column():
|
245 |
+
organisation = gr.Textbox(label="Organisation or Team Name", type='text')
|
246 |
mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email')
|
247 |
file_output = gr.File()
|
248 |
|
|
|
252 |
submit_button.click(
|
253 |
add_new_eval,
|
254 |
[
|
255 |
+
system_name_textbox,
|
256 |
method_textbox,
|
|
|
257 |
file_output,
|
258 |
organisation,
|
259 |
mail
|
content.py
CHANGED
@@ -76,14 +76,7 @@ There should be 506 predictions, corresponding to the 506 urls of the test set.
|
|
76 |
|
77 |
|
78 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
79 |
-
CITATION_BUTTON_TEXT =
|
80 |
-
title={ConTextual: Evaluating Context-Sensitive Text-Rich Visual Reasoning in Large Multimodal Models},
|
81 |
-
author={Rohan Wadhawan and Hritik Bansal and Kai-Wei Chang and Nanyun Peng},
|
82 |
-
year={2024},
|
83 |
-
eprint={2401.13311},
|
84 |
-
archivePrefix={arXiv},
|
85 |
-
primaryClass={cs.CV}
|
86 |
-
}"""
|
87 |
|
88 |
|
89 |
def format_error(msg):
|
|
|
76 |
|
77 |
|
78 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
79 |
+
CITATION_BUTTON_TEXT = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
|
82 |
def format_error(msg):
|
scorer.py
CHANGED
@@ -16,15 +16,15 @@ def instruction_scorer(file_path_input, ref_file_path, system_name):
|
|
16 |
subprocess.run(cmd_hyp)
|
17 |
|
18 |
# Read the JSON file and print the cpWER
|
19 |
-
|
20 |
-
output_cpwer_hyp_json_file =
|
|
|
21 |
with open(output_cpwer_hyp_json_file, "r") as temp_file:
|
22 |
data_h = json.load(temp_file)
|
23 |
print("Hypothesis cpWER:", data_h["error_rate"])
|
24 |
cpwer = data_h["error_rate"]
|
25 |
logging.info(f"-> HYPOTHESIS cpWER={cpwer:.4f}")
|
26 |
-
|
27 |
-
scores_dict = {"cpWER": cpwer, "WER": cpwer}
|
28 |
return scores_dict
|
29 |
|
30 |
|
|
|
16 |
subprocess.run(cmd_hyp)
|
17 |
|
18 |
# Read the JSON file and print the cpWER
|
19 |
+
print("file_path_input:", file_path_input)
|
20 |
+
output_cpwer_hyp_json_file = file_path_input.replace(".hyp.seglst.json", ".hyp.seglst_cpwer.json")
|
21 |
+
|
22 |
with open(output_cpwer_hyp_json_file, "r") as temp_file:
|
23 |
data_h = json.load(temp_file)
|
24 |
print("Hypothesis cpWER:", data_h["error_rate"])
|
25 |
cpwer = data_h["error_rate"]
|
26 |
logging.info(f"-> HYPOTHESIS cpWER={cpwer:.4f}")
|
27 |
+
scores_dict = {"cpWER": cpwer, "errors": data_h["errors"], "length": data_h["length"]}
|
|
|
28 |
return scores_dict
|
29 |
|
30 |
|