Upload eval.py
Browse files
eval.py
CHANGED
@@ -9,19 +9,19 @@ from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
|
|
9 |
import datetime
|
10 |
import random
|
11 |
import re
|
12 |
-
from sacrebleu.metrics import CHRF
|
13 |
import time
|
14 |
import sys
|
15 |
# from keras import ops
|
16 |
#hyperparameters
|
17 |
MAX_SEQUENCE_LENGTH = 64
|
18 |
-
eval_samples =
|
19 |
-
|
20 |
|
21 |
transformer = keras.models.load_model('models_europarl/en_cs_translator_saved_20231209_0046.keras')
|
22 |
def read_files(path, lowercase = False):
|
23 |
with open(path, "r", encoding="utf-8") as f:
|
24 |
dataset_split = f.read().split("\n")[:-1]
|
|
|
25 |
if(lowercase):
|
26 |
dataset_split = [line.lower() for line in dataset_split]
|
27 |
return dataset_split
|
@@ -148,12 +148,9 @@ def decode_sequences(input_sentence):
|
|
148 |
|
149 |
test_en = read_files('datasets/europarl/test-cs-en.en')
|
150 |
test_cs = read_files('datasets/europarl/test-cs-en.cs')
|
151 |
-
bleu_metrics = keras_nlp.metrics.Bleu(
|
152 |
-
name="bleu",
|
153 |
-
tokenizer = cs_tokenizer
|
154 |
-
)
|
155 |
|
156 |
chrf = CHRF()
|
|
|
157 |
refs = test_cs[:eval_samples]
|
158 |
translations = []
|
159 |
start_time = time.time()
|
@@ -177,17 +174,17 @@ end_time = time.time()
|
|
177 |
|
178 |
|
179 |
|
|
|
180 |
|
181 |
refs_twodim = [[ref] for ref in refs]
|
182 |
-
bleu_metrics(refs_twodim, translations)
|
183 |
|
184 |
print("evaluating chrf", flush=True)
|
185 |
chrf2_result = chrf.corpus_score(translations, refs_twodim)
|
186 |
-
|
187 |
print("chrf2")
|
188 |
print(chrf2_result)
|
189 |
print("bleu")
|
190 |
-
print(
|
191 |
-
print("elapsed time")
|
192 |
elapsed_time = end_time - start_time
|
193 |
-
print(
|
|
|
|
9 |
import datetime
|
10 |
import random
|
11 |
import re
|
12 |
+
from sacrebleu.metrics import CHRF, BLEU
|
13 |
import time
|
14 |
import sys
|
15 |
# from keras import ops
|
16 |
#hyperparameters
|
17 |
MAX_SEQUENCE_LENGTH = 64
|
18 |
+
eval_samples = 10
|
|
|
19 |
|
20 |
transformer = keras.models.load_model('models_europarl/en_cs_translator_saved_20231209_0046.keras')
|
21 |
def read_files(path, lowercase = False):
|
22 |
with open(path, "r", encoding="utf-8") as f:
|
23 |
dataset_split = f.read().split("\n")[:-1]
|
24 |
+
#to lowercase, idk why
|
25 |
if(lowercase):
|
26 |
dataset_split = [line.lower() for line in dataset_split]
|
27 |
return dataset_split
|
|
|
148 |
|
149 |
test_en = read_files('datasets/europarl/test-cs-en.en')
|
150 |
test_cs = read_files('datasets/europarl/test-cs-en.cs')
|
|
|
|
|
|
|
|
|
151 |
|
152 |
chrf = CHRF()
|
153 |
+
bleu = BLEU()
|
154 |
refs = test_cs[:eval_samples]
|
155 |
translations = []
|
156 |
start_time = time.time()
|
|
|
174 |
|
175 |
|
176 |
|
177 |
+
print("evaluating bleu", flush=True)
|
178 |
|
179 |
refs_twodim = [[ref] for ref in refs]
|
|
|
180 |
|
181 |
print("evaluating chrf", flush=True)
|
182 |
chrf2_result = chrf.corpus_score(translations, refs_twodim)
|
183 |
+
bleu_result = bleu.corpus_score(translations, refs_twodim)
|
184 |
print("chrf2")
|
185 |
print(chrf2_result)
|
186 |
print("bleu")
|
187 |
+
print(bleu_result)
|
|
|
188 |
elapsed_time = end_time - start_time
|
189 |
+
print("elapsed time")
|
190 |
+
print(elapsed_time)
|