rbelanec commited on
Commit
c1b9cdf
verified
1 Parent(s): 942b608

End of training

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # train_2025-04-09-14-52-53
18
 
19
- This model is a fine-tuned version of [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it) on an unknown dataset.
20
 
21
  ## Model description
22
 
 
16
 
17
  # train_2025-04-09-14-52-53
18
 
19
+ This model is a fine-tuned version of [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it) on the glue_mnli_train dataset.
20
 
21
  ## Model description
22
 
all_results.json CHANGED
@@ -1,13 +1,27 @@
1
  {
2
  "epoch": 2.992,
 
3
  "eval_glue_mnli_eval_loss": 0.15313956141471863,
4
  "eval_glue_mnli_eval_runtime": 9.6591,
5
  "eval_glue_mnli_eval_samples_per_second": 103.529,
6
  "eval_glue_mnli_eval_steps_per_second": 12.941,
 
 
 
 
 
 
7
  "num_input_tokens_seen": 194480,
 
 
 
 
 
 
 
8
  "total_flos": 821972377374720.0,
9
- "train_loss": 0.07222598022030245,
10
- "train_runtime": 189.9362,
11
- "train_samples_per_second": 15.795,
12
- "train_steps_per_second": 0.979
13
  }
 
1
  {
2
  "epoch": 2.992,
3
+ "eval_bleu-4": 67.11986,
4
  "eval_glue_mnli_eval_loss": 0.15313956141471863,
5
  "eval_glue_mnli_eval_runtime": 9.6591,
6
  "eval_glue_mnli_eval_samples_per_second": 103.529,
7
  "eval_glue_mnli_eval_steps_per_second": 12.941,
8
+ "eval_rouge-1": 71.4,
9
+ "eval_rouge-2": 0.0,
10
+ "eval_rouge-l": 71.4,
11
+ "eval_runtime": 37.7648,
12
+ "eval_samples_per_second": 26.48,
13
+ "eval_steps_per_second": 3.31,
14
  "num_input_tokens_seen": 194480,
15
+ "predict_bleu-4": 67.8166649,
16
+ "predict_rouge-1": 72.3,
17
+ "predict_rouge-2": 0.0,
18
+ "predict_rouge-l": 72.3,
19
+ "predict_runtime": 36.8092,
20
+ "predict_samples_per_second": 27.167,
21
+ "predict_steps_per_second": 3.396,
22
  "total_flos": 821972377374720.0,
23
+ "train_loss": 0.0,
24
+ "train_runtime": 1.678,
25
+ "train_samples_per_second": 1787.823,
26
+ "train_steps_per_second": 110.845
27
  }
eval_results.json CHANGED
@@ -1,8 +1,11 @@
1
  {
2
  "epoch": 2.992,
3
- "eval_glue_mnli_eval_loss": 0.15313956141471863,
4
- "eval_glue_mnli_eval_runtime": 9.6591,
5
- "eval_glue_mnli_eval_samples_per_second": 103.529,
6
- "eval_glue_mnli_eval_steps_per_second": 12.941,
 
 
 
7
  "num_input_tokens_seen": 194480
8
  }
 
1
  {
2
  "epoch": 2.992,
3
+ "eval_bleu-4": 67.11986,
4
+ "eval_rouge-1": 71.4,
5
+ "eval_rouge-2": 0.0,
6
+ "eval_rouge-l": 71.4,
7
+ "eval_runtime": 37.7648,
8
+ "eval_samples_per_second": 26.48,
9
+ "eval_steps_per_second": 3.31,
10
  "num_input_tokens_seen": 194480
11
  }
generated_predictions.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
predict_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_bleu-4": 67.8166649,
3
+ "predict_rouge-1": 72.3,
4
+ "predict_rouge-2": 0.0,
5
+ "predict_rouge-l": 72.3,
6
+ "predict_runtime": 36.8092,
7
+ "predict_samples_per_second": 27.167,
8
+ "predict_steps_per_second": 3.396
9
+ }
tokenizer_config.json CHANGED
@@ -51340,7 +51340,7 @@
51340
  "image_token": "<image_soft_token>",
51341
  "model_max_length": 1000000000000000019884624838656,
51342
  "pad_token": "<pad>",
51343
- "padding_side": "right",
51344
  "processor_class": "Gemma3Processor",
51345
  "sp_model_kwargs": null,
51346
  "spaces_between_special_tokens": false,
 
51340
  "image_token": "<image_soft_token>",
51341
  "model_max_length": 1000000000000000019884624838656,
51342
  "pad_token": "<pad>",
51343
+ "padding_side": "left",
51344
  "processor_class": "Gemma3Processor",
51345
  "sp_model_kwargs": null,
51346
  "spaces_between_special_tokens": false,
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 2.992,
3
  "num_input_tokens_seen": 194480,
4
  "total_flos": 821972377374720.0,
5
- "train_loss": 0.07222598022030245,
6
- "train_runtime": 189.9362,
7
- "train_samples_per_second": 15.795,
8
- "train_steps_per_second": 0.979
9
  }
 
2
  "epoch": 2.992,
3
  "num_input_tokens_seen": 194480,
4
  "total_flos": 821972377374720.0,
5
+ "train_loss": 0.0,
6
+ "train_runtime": 1.678,
7
+ "train_samples_per_second": 1787.823,
8
+ "train_steps_per_second": 110.845
9
  }
trainer_state.json CHANGED
@@ -310,10 +310,10 @@
310
  "num_input_tokens_seen": 194480,
311
  "step": 186,
312
  "total_flos": 821972377374720.0,
313
- "train_loss": 0.07222598022030245,
314
- "train_runtime": 189.9362,
315
- "train_samples_per_second": 15.795,
316
- "train_steps_per_second": 0.979
317
  }
318
  ],
319
  "logging_steps": 5,
 
310
  "num_input_tokens_seen": 194480,
311
  "step": 186,
312
  "total_flos": 821972377374720.0,
313
+ "train_loss": 0.0,
314
+ "train_runtime": 1.678,
315
+ "train_samples_per_second": 1787.823,
316
+ "train_steps_per_second": 110.845
317
  }
318
  ],
319
  "logging_steps": 5,