jkot commited on
Commit
389ef93
·
1 Parent(s): 32e3407

Upload eval.py

Browse files
Files changed (1) hide show
  1. eval.py +9 -12
eval.py CHANGED
@@ -9,19 +9,19 @@ from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
9
  import datetime
10
  import random
11
  import re
12
- from sacrebleu.metrics import CHRF
13
  import time
14
  import sys
15
  # from keras import ops
16
  #hyperparameters
17
  MAX_SEQUENCE_LENGTH = 64
18
- eval_samples = 100
19
-
20
 
21
  transformer = keras.models.load_model('models_europarl/en_cs_translator_saved_20231209_0046.keras')
22
  def read_files(path, lowercase = False):
23
  with open(path, "r", encoding="utf-8") as f:
24
  dataset_split = f.read().split("\n")[:-1]
 
25
  if(lowercase):
26
  dataset_split = [line.lower() for line in dataset_split]
27
  return dataset_split
@@ -148,12 +148,9 @@ def decode_sequences(input_sentence):
148
 
149
  test_en = read_files('datasets/europarl/test-cs-en.en')
150
  test_cs = read_files('datasets/europarl/test-cs-en.cs')
151
- bleu_metrics = keras_nlp.metrics.Bleu(
152
- name="bleu",
153
- tokenizer = cs_tokenizer
154
- )
155
 
156
  chrf = CHRF()
 
157
  refs = test_cs[:eval_samples]
158
  translations = []
159
  start_time = time.time()
@@ -177,17 +174,17 @@ end_time = time.time()
177
 
178
 
179
 
 
180
 
181
  refs_twodim = [[ref] for ref in refs]
182
- bleu_metrics(refs_twodim, translations)
183
 
184
  print("evaluating chrf", flush=True)
185
  chrf2_result = chrf.corpus_score(translations, refs_twodim)
186
-
187
  print("chrf2")
188
  print(chrf2_result)
189
  print("bleu")
190
- print(bleu_metrics.result().numpy())
191
- print("elapsed time")
192
  elapsed_time = end_time - start_time
193
- print(elapsed_time)
 
 
9
  import datetime
10
  import random
11
  import re
12
+ from sacrebleu.metrics import CHRF, BLEU
13
  import time
14
  import sys
15
  # from keras import ops
16
  #hyperparameters
17
  MAX_SEQUENCE_LENGTH = 64
18
+ eval_samples = 10
 
19
 
20
  transformer = keras.models.load_model('models_europarl/en_cs_translator_saved_20231209_0046.keras')
21
  def read_files(path, lowercase = False):
22
  with open(path, "r", encoding="utf-8") as f:
23
  dataset_split = f.read().split("\n")[:-1]
24
+ #to lowercase, idk why
25
  if(lowercase):
26
  dataset_split = [line.lower() for line in dataset_split]
27
  return dataset_split
 
148
 
149
  test_en = read_files('datasets/europarl/test-cs-en.en')
150
  test_cs = read_files('datasets/europarl/test-cs-en.cs')
 
 
 
 
151
 
152
  chrf = CHRF()
153
+ bleu = BLEU()
154
  refs = test_cs[:eval_samples]
155
  translations = []
156
  start_time = time.time()
 
174
 
175
 
176
 
177
+ print("evaluating bleu", flush=True)
178
 
179
  refs_twodim = [[ref] for ref in refs]
 
180
 
181
  print("evaluating chrf", flush=True)
182
  chrf2_result = chrf.corpus_score(translations, refs_twodim)
183
+ bleu_result = bleu.corpus_score(translations, refs_twodim)
184
  print("chrf2")
185
  print(chrf2_result)
186
  print("bleu")
187
+ print(bleu_result)
 
188
  elapsed_time = end_time - start_time
189
+ print("elapsed time")
190
+ print(elapsed_time)