End of training
Browse files- README.md +1 -1
- all_results.json +18 -4
- eval_results.json +7 -4
- generated_predictions.jsonl +0 -0
- predict_results.json +9 -0
- tokenizer_config.json +1 -1
- train_results.json +4 -4
- trainer_state.json +4 -4
README.md
CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
16 |
|
17 |
# train_2025-04-09-14-52-53
|
18 |
|
19 |
-
This model is a fine-tuned version of [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it) on
|
20 |
|
21 |
## Model description
|
22 |
|
|
|
16 |
|
17 |
# train_2025-04-09-14-52-53
|
18 |
|
19 |
+
This model is a fine-tuned version of [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it) on the glue_mnli_train dataset.
|
20 |
|
21 |
## Model description
|
22 |
|
all_results.json
CHANGED
@@ -1,13 +1,27 @@
|
|
1 |
{
|
2 |
"epoch": 2.992,
|
|
|
3 |
"eval_glue_mnli_eval_loss": 0.15313956141471863,
|
4 |
"eval_glue_mnli_eval_runtime": 9.6591,
|
5 |
"eval_glue_mnli_eval_samples_per_second": 103.529,
|
6 |
"eval_glue_mnli_eval_steps_per_second": 12.941,
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
"num_input_tokens_seen": 194480,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"total_flos": 821972377374720.0,
|
9 |
-
"train_loss": 0.
|
10 |
-
"train_runtime":
|
11 |
-
"train_samples_per_second":
|
12 |
-
"train_steps_per_second":
|
13 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.992,
|
3 |
+
"eval_bleu-4": 67.11986,
|
4 |
"eval_glue_mnli_eval_loss": 0.15313956141471863,
|
5 |
"eval_glue_mnli_eval_runtime": 9.6591,
|
6 |
"eval_glue_mnli_eval_samples_per_second": 103.529,
|
7 |
"eval_glue_mnli_eval_steps_per_second": 12.941,
|
8 |
+
"eval_rouge-1": 71.4,
|
9 |
+
"eval_rouge-2": 0.0,
|
10 |
+
"eval_rouge-l": 71.4,
|
11 |
+
"eval_runtime": 37.7648,
|
12 |
+
"eval_samples_per_second": 26.48,
|
13 |
+
"eval_steps_per_second": 3.31,
|
14 |
"num_input_tokens_seen": 194480,
|
15 |
+
"predict_bleu-4": 67.8166649,
|
16 |
+
"predict_rouge-1": 72.3,
|
17 |
+
"predict_rouge-2": 0.0,
|
18 |
+
"predict_rouge-l": 72.3,
|
19 |
+
"predict_runtime": 36.8092,
|
20 |
+
"predict_samples_per_second": 27.167,
|
21 |
+
"predict_steps_per_second": 3.396,
|
22 |
"total_flos": 821972377374720.0,
|
23 |
+
"train_loss": 0.0,
|
24 |
+
"train_runtime": 1.678,
|
25 |
+
"train_samples_per_second": 1787.823,
|
26 |
+
"train_steps_per_second": 110.845
|
27 |
}
|
eval_results.json
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
{
|
2 |
"epoch": 2.992,
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
|
|
|
|
|
|
7 |
"num_input_tokens_seen": 194480
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.992,
|
3 |
+
"eval_bleu-4": 67.11986,
|
4 |
+
"eval_rouge-1": 71.4,
|
5 |
+
"eval_rouge-2": 0.0,
|
6 |
+
"eval_rouge-l": 71.4,
|
7 |
+
"eval_runtime": 37.7648,
|
8 |
+
"eval_samples_per_second": 26.48,
|
9 |
+
"eval_steps_per_second": 3.31,
|
10 |
"num_input_tokens_seen": 194480
|
11 |
}
|
generated_predictions.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
predict_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_bleu-4": 67.8166649,
|
3 |
+
"predict_rouge-1": 72.3,
|
4 |
+
"predict_rouge-2": 0.0,
|
5 |
+
"predict_rouge-l": 72.3,
|
6 |
+
"predict_runtime": 36.8092,
|
7 |
+
"predict_samples_per_second": 27.167,
|
8 |
+
"predict_steps_per_second": 3.396
|
9 |
+
}
|
tokenizer_config.json
CHANGED
@@ -51340,7 +51340,7 @@
|
|
51340 |
"image_token": "<image_soft_token>",
|
51341 |
"model_max_length": 1000000000000000019884624838656,
|
51342 |
"pad_token": "<pad>",
|
51343 |
-
"padding_side": "
|
51344 |
"processor_class": "Gemma3Processor",
|
51345 |
"sp_model_kwargs": null,
|
51346 |
"spaces_between_special_tokens": false,
|
|
|
51340 |
"image_token": "<image_soft_token>",
|
51341 |
"model_max_length": 1000000000000000019884624838656,
|
51342 |
"pad_token": "<pad>",
|
51343 |
+
"padding_side": "left",
|
51344 |
"processor_class": "Gemma3Processor",
|
51345 |
"sp_model_kwargs": null,
|
51346 |
"spaces_between_special_tokens": false,
|
train_results.json
CHANGED
@@ -2,8 +2,8 @@
|
|
2 |
"epoch": 2.992,
|
3 |
"num_input_tokens_seen": 194480,
|
4 |
"total_flos": 821972377374720.0,
|
5 |
-
"train_loss": 0.
|
6 |
-
"train_runtime":
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second":
|
9 |
}
|
|
|
2 |
"epoch": 2.992,
|
3 |
"num_input_tokens_seen": 194480,
|
4 |
"total_flos": 821972377374720.0,
|
5 |
+
"train_loss": 0.0,
|
6 |
+
"train_runtime": 1.678,
|
7 |
+
"train_samples_per_second": 1787.823,
|
8 |
+
"train_steps_per_second": 110.845
|
9 |
}
|
trainer_state.json
CHANGED
@@ -310,10 +310,10 @@
|
|
310 |
"num_input_tokens_seen": 194480,
|
311 |
"step": 186,
|
312 |
"total_flos": 821972377374720.0,
|
313 |
-
"train_loss": 0.
|
314 |
-
"train_runtime":
|
315 |
-
"train_samples_per_second":
|
316 |
-
"train_steps_per_second":
|
317 |
}
|
318 |
],
|
319 |
"logging_steps": 5,
|
|
|
310 |
"num_input_tokens_seen": 194480,
|
311 |
"step": 186,
|
312 |
"total_flos": 821972377374720.0,
|
313 |
+
"train_loss": 0.0,
|
314 |
+
"train_runtime": 1.678,
|
315 |
+
"train_samples_per_second": 1787.823,
|
316 |
+
"train_steps_per_second": 110.845
|
317 |
}
|
318 |
],
|
319 |
"logging_steps": 5,
|