Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
import sys | |
import os | |
import json | |
from collections import defaultdict | |
from data import split_by_lang, make_path_compatible, get_tr_pairs | |
from inference import coupled_translate, load_and_init_module_config, neurotolge_in_batches | |
from evaluate import load as load_metric | |
from legacy.langconv import get_mdl_type, get_joshi_class | |
from accelerate import Accelerator | |
from aux import log | |
def get_hyp_cache_dir(model_location, create=False): | |
hyp_location = os.path.join(model_location, "hyp_cache") | |
if create: | |
os.makedirs(hyp_location, exist_ok=True) | |
return hyp_location | |
def get_hyp_cache_filename(model_location, benchmark_corpus, src_lang, tgt_lang): | |
hyp_location = get_hyp_cache_dir(model_location) | |
corpus_base = os.path.basename(benchmark_corpus) | |
basename = f"{corpus_base}-{src_lang}-to-{tgt_lang}" | |
hyp_file = os.path.join(hyp_location, f"{basename}.hyp") | |
src_file = os.path.join(hyp_location, f"{basename}.src") | |
return hyp_file, src_file | |
def get_benchmark_filename(model_location, benchmark_corpus): | |
corpus_base = os.path.basename(benchmark_corpus) | |
hyp_file = f"{corpus_base}-scores.json" | |
return os.path.join(model_location, hyp_file) | |
def load_hyps_from_file(filename): | |
with open(filename, "r", encoding="utf-8") as f: | |
return [line.strip() for line in f] | |
def save_hyps_to_file(hypos, filename): | |
if hypos is not None: | |
with open(filename, "w", encoding="utf-8") as f: | |
for hyp in hypos: | |
f.write(hyp + "\n") | |
def load_or_translate(mod_config, input_output_list, lp, model_location, benchmark_corpus): | |
src_lang, tgt_lang = lp.split("-") | |
inputs, _ = zip(*input_output_list) | |
cache_filename, src_filename = get_hyp_cache_filename(model_location, benchmark_corpus, src_lang, tgt_lang) | |
try: | |
hypos = load_hyps_from_file(cache_filename) | |
except FileNotFoundError: | |
if model_location == "models/neurotolge": | |
hypos = neurotolge_in_batches(inputs, src_lang, tgt_lang) | |
else: | |
hypos = coupled_translate(mod_config, inputs, src_lang, tgt_lang) | |
if hypos is not None: | |
save_hyps_to_file(hypos, cache_filename) | |
save_hyps_to_file(inputs, src_filename) | |
return zip(inputs, hypos) | |
def translate_all_hyps(lp_test_set_dict, module_conf, model_id, corpus_id, accelerator=None): | |
if accelerator is not None: | |
key_list = sorted(lp_test_set_dict.keys()) | |
for idx, lp in enumerate(key_list): | |
if idx % accelerator.num_processes == accelerator.process_index: | |
log(f"Process {accelerator.process_index} translating {lp}") | |
load_or_translate(module_conf, lp_test_set_dict[lp], lp, model_id, corpus_id) | |
accelerator.wait_for_everyone() | |
else: | |
result = dict() | |
for i, lp in enumerate(lp_test_set_dict.keys()): | |
log(f"Translating {lp}, {i + 1}/{len(lp_test_set_dict)}") | |
result[lp] = load_or_translate(module_conf, lp_test_set_dict[lp], lp, model_id, corpus_id) | |
return result | |
def get_joshi_lp(from_lang, to_lang): | |
from_joshi = get_joshi_class(from_lang) | |
to_joshi = get_joshi_class(to_lang) | |
return f"{from_joshi}-{to_joshi}" | |
def get_all_scores(hyps_dict, lp_test_sets, metric_dict): | |
scores = dict() | |
avgs = defaultdict(list) | |
for lp in lp_test_sets: | |
from_lang, to_lang = lp.split("-") | |
jlp = get_joshi_lp(from_lang, to_lang) | |
_, outputs = zip(*lp_test_sets[lp]) | |
preds = None if hyps_dict[lp] is None else [hyp for _, hyp in hyps_dict[lp]] | |
for metric_name in metric_dict: | |
metric_func = metric_dict[metric_name] | |
if preds is not None: | |
metric_value = metric_func.compute(predictions=preds, references=outputs) | |
scores[lp + "-" + metric_name] = metric_value['score'] | |
avgs[jlp + "-" + metric_name].append(metric_value['score']) | |
for avg_k in avgs: | |
scores[avg_k] = sum(avgs[avg_k]) / len(avgs[avg_k]) | |
return scores | |
def save_scores(scores, mdl_id, corpus): | |
filename = get_benchmark_filename(mdl_id, corpus) | |
with open(filename, "w") as ofh: | |
json.dump(scores, ofh, indent=2, sort_keys=True) | |
def benchmark_neurotolge(corpus): | |
log("Loading data") | |
lp_test_sets = split_by_lang(filename=corpus, model_type=None) | |
log("Starting benchmarking") | |
_ = get_hyp_cache_dir("models/neurotolge", create=True) | |
hyps_dict = translate_all_hyps(lp_test_sets, None, "models/neurotolge", corpus) | |
log("Loading metrics") | |
exp_id = "neurotõlge---" + make_path_compatible(corpus) | |
metric_dict = { | |
'bleu': load_metric("sacrebleu", experiment_id=exp_id), | |
'chrf': load_metric("chrf", experiment_id=exp_id) } | |
scores = get_all_scores(hyps_dict, lp_test_sets, metric_dict) | |
save_scores(scores, "models/neurotolge", corpus) | |
def benchmark_local_model(mdl_id, corpus): | |
accelerator = Accelerator() | |
main_model, module_config = load_and_init_module_config(mdl_id, accelerator) | |
log("Loading data", accelerator=accelerator) | |
lp_test_sets = split_by_lang(filename=corpus, model_type=get_mdl_type(main_model)) | |
log("Loading metrics", accelerator=accelerator) | |
exp_id = make_path_compatible(mdl_id) + "---" + make_path_compatible(corpus) | |
metric_dict = { | |
'bleu': load_metric("sacrebleu", experiment_id=exp_id), | |
'chrf': load_metric("chrf", experiment_id=exp_id) } | |
log("Starting benchmarking", accelerator=accelerator) | |
if accelerator.is_main_process: | |
_ = get_hyp_cache_dir(mdl_id, create=True) | |
translate_all_hyps(lp_test_sets, module_config, mdl_id, corpus, accelerator) | |
if accelerator.is_main_process: | |
fin_hyps_dict = translate_all_hyps(lp_test_sets, module_config, mdl_id, corpus) | |
scores = get_all_scores(fin_hyps_dict, lp_test_sets, metric_dict) | |
save_scores(scores, mdl_id, corpus) | |
if __name__ == '__main__': | |
mdl_id_param = sys.argv[1] | |
corpus_param = sys.argv[2] | |
if mdl_id_param == "neurotolge": | |
benchmark_neurotolge(corpus_param) | |
else: | |
benchmark_local_model(mdl_id_param, corpus_param) | |