berkayaltntas commited on
Commit
d528afd
·
verified ·
1 Parent(s): e4ef860

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -5,7 +5,7 @@
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
  "hidden_act": "gelu",
8
- "hidden_dropout_prob": 0.1,
9
  "hidden_size": 512,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 2048,
 
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
  "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.3817407923797774,
9
  "hidden_size": 512,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 2048,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39971be058254b6fcaad09fd5a497f8f24dfc938a10ed4bf826735827a210d84
3
  size 165512728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:363bac08b799d41f49c5dd03f3ea37d5862ca0691f193d70f68e52fc64b4e076
3
  size 165512728
run-0/checkpoint-156/config.json CHANGED
@@ -5,7 +5,7 @@
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
  "hidden_act": "gelu",
8
- "hidden_dropout_prob": 0.1891988684237177,
9
  "hidden_size": 512,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 2048,
 
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
  "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.3817407923797774,
9
  "hidden_size": 512,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 2048,
run-0/checkpoint-156/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e523ef390379259e2d519e2d17198780a1cde58ae8503b3aba68131e7af118d0
3
  size 165512728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:363bac08b799d41f49c5dd03f3ea37d5862ca0691f193d70f68e52fc64b4e076
3
  size 165512728
run-0/checkpoint-156/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f751b3beeba64cf089f013861f9f46c10c3a7a35cc7d1fbe1d9a54c6499cb452
3
  size 331108474
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9752a3ef956476a550a50f14fba0f487b5ab49a96516c41a197f5511dae44c
3
  size 331108474
run-0/checkpoint-156/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4c504aa00e570dc675c82f1818a4568fe15a929890ef14b78d7e6d653a6ea55
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a29afde6d2148f5bed68a601216924711accd006df1a59fa931b8b5c921299c4
3
  size 1064
run-0/checkpoint-156/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 156,
3
- "best_metric": 0.5090252707581228,
4
  "best_model_checkpoint": "bert_uncased_L-8_H-512_A-8-finetuned-rte-run_1/run-0/checkpoint-156",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,18 +11,18 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5090252707581228,
15
- "eval_loss": 0.690215528011322,
16
- "eval_runtime": 0.9783,
17
- "eval_samples_per_second": 283.148,
18
- "eval_steps_per_second": 5.111,
19
  "step": 156
20
  }
21
  ],
22
  "logging_steps": 500,
23
- "max_steps": 624,
24
  "num_input_tokens_seen": 0,
25
- "num_train_epochs": 4,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
@@ -40,10 +40,7 @@
40
  "train_batch_size": 16,
41
  "trial_name": null,
42
  "trial_params": {
43
- "classifier_dropout": 0.1891988684237177,
44
- "learning_rate": 4.546022869296215e-06,
45
- "max_length": 256,
46
- "num_train_epochs": 4,
47
- "per_device_train_batch_size": 16
48
  }
49
  }
 
1
  {
2
  "best_global_step": 156,
3
+ "best_metric": 0.4981949458483754,
4
  "best_model_checkpoint": "bert_uncased_L-8_H-512_A-8-finetuned-rte-run_1/run-0/checkpoint-156",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.4981949458483754,
15
+ "eval_loss": 0.6968499422073364,
16
+ "eval_runtime": 0.9678,
17
+ "eval_samples_per_second": 286.225,
18
+ "eval_steps_per_second": 5.167,
19
  "step": 156
20
  }
21
  ],
22
  "logging_steps": 500,
23
+ "max_steps": 312,
24
  "num_input_tokens_seen": 0,
25
+ "num_train_epochs": 2,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
 
40
  "train_batch_size": 16,
41
  "trial_name": null,
42
  "trial_params": {
43
+ "classifier_dropout": 0.3817407923797774,
44
+ "learning_rate": 2e-05
 
 
 
45
  }
46
  }
run-0/checkpoint-156/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c5f34da6ceb66aad0eb63c414662f43a60eed98b09cbecadb2699ba48e9f997
3
- size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23ca9a06a8c3177f164972aa5dcabf054195fcaa9e67baf9fe5bbdd8dca15278
3
+ size 5496
run-0/checkpoint-312/config.json CHANGED
@@ -5,7 +5,7 @@
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
  "hidden_act": "gelu",
8
- "hidden_dropout_prob": 0.1891988684237177,
9
  "hidden_size": 512,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 2048,
 
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
  "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.3817407923797774,
9
  "hidden_size": 512,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 2048,
run-0/checkpoint-312/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 312,
3
- "best_metric": 0.5703971119133574,
4
  "best_model_checkpoint": "bert_uncased_L-8_H-512_A-8-finetuned-rte-run_1/run-0/checkpoint-312",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,27 +11,27 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5090252707581228,
15
- "eval_loss": 0.690215528011322,
16
- "eval_runtime": 0.9783,
17
- "eval_samples_per_second": 283.148,
18
- "eval_steps_per_second": 5.111,
19
  "step": 156
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.5703971119133574,
24
- "eval_loss": 0.6809557676315308,
25
- "eval_runtime": 0.9817,
26
- "eval_samples_per_second": 282.165,
27
- "eval_steps_per_second": 5.093,
28
  "step": 312
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 624,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 4,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
@@ -40,7 +40,7 @@
40
  "should_evaluate": false,
41
  "should_log": false,
42
  "should_save": true,
43
- "should_training_stop": false
44
  },
45
  "attributes": {}
46
  }
@@ -49,10 +49,7 @@
49
  "train_batch_size": 16,
50
  "trial_name": null,
51
  "trial_params": {
52
- "classifier_dropout": 0.1891988684237177,
53
- "learning_rate": 4.546022869296215e-06,
54
- "max_length": 256,
55
- "num_train_epochs": 4,
56
- "per_device_train_batch_size": 16
57
  }
58
  }
 
1
  {
2
  "best_global_step": 312,
3
+ "best_metric": 0.5487364620938628,
4
  "best_model_checkpoint": "bert_uncased_L-8_H-512_A-8-finetuned-rte-run_1/run-0/checkpoint-312",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.4981949458483754,
15
+ "eval_loss": 0.6968499422073364,
16
+ "eval_runtime": 0.9678,
17
+ "eval_samples_per_second": 286.225,
18
+ "eval_steps_per_second": 5.167,
19
  "step": 156
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.5487364620938628,
24
+ "eval_loss": 0.6850671768188477,
25
+ "eval_runtime": 0.9382,
26
+ "eval_samples_per_second": 295.242,
27
+ "eval_steps_per_second": 5.329,
28
  "step": 312
29
  }
30
  ],
31
  "logging_steps": 500,
32
+ "max_steps": 312,
33
  "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 2,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
 
40
  "should_evaluate": false,
41
  "should_log": false,
42
  "should_save": true,
43
+ "should_training_stop": true
44
  },
45
  "attributes": {}
46
  }
 
49
  "train_batch_size": 16,
50
  "trial_name": null,
51
  "trial_params": {
52
+ "classifier_dropout": 0.3817407923797774,
53
+ "learning_rate": 2e-05
 
 
 
54
  }
55
  }
runs/Apr03_13-50-26_435476ce9a2d/events.out.tfevents.1743689409.435476ce9a2d.544.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daa0f02a71e24cf003b9c885f440ab79464d985dce99c217652b93ab31a4c47a
3
- size 5358
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59007e7572973eae7dc27eaee6dcafe5a5e7532addc0a33506805113e4bc42c
3
+ size 6035
runs/Apr03_13-50-26_435476ce9a2d/events.out.tfevents.1743690454.435476ce9a2d.544.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95996e35ee57ec423edbbf6d704f81372680d56ba3fe61b15266f3fa5eef2836
3
+ size 5356
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a9555b45eb015cc8e38ce35ebfb7784a11973e590d59045ce2963614c8b84b7
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23ca9a06a8c3177f164972aa5dcabf054195fcaa9e67baf9fe5bbdd8dca15278
3
  size 5496