kimlong22 commited on
Commit
476e599
·
verified ·
1 Parent(s): a8182d3

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89c03989f75d0aa4bd09aa494db2abf9fdb0699814577f09a2b8630feeff3136
3
  size 1115268200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d08f5b2079b12d0ebc378b7ba37719cef438a77d0c7ff748f868686e1b3dec97
3
  size 1115268200
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:122128e68accfbfaa61106e4a08a7177da6af52b3b3da0e1b17c0c52f40ef1dc
3
  size 2230655994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:092349cbd12ca364369f5254eb4088d1727ae8d73317e0a9eb7f6a8a952e8b0e
3
  size 2230655994
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21c2e0a4cab570225070feda692b2162a9b40fcabdd5637926e2d942260865f5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:719d6c8ad98e64ac546193b08135e5ed391384ea1aee1b3a87bf60d493661041
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:719551c691645b80be6d64346b6b37cbcf1c3bcf115735210b293531a14241f1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b0732bd2e1d48df8ec9b79fbeaa48f18a2d97b40cc9d445eba425f193f78c1
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1abbaa3e61df15650b0b8d5e31d1765de2d084229bfb2c0930169fd3f38b650
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb77a464f5f5dde250d2da9dbae68cde20b2e27488ac4cb6eb0fd199c937e351
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:697b606be1130775bd88ffdee4a1649261f6a26edeb2e1eb8717d55a9b3c7246
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1404f64094b86b7230b1f67e3aa381b83b84932923cdc1093b67ee7107422cf
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2308ce4ffc571a690001de8ba0878d90739f84e4e4454d446e6a0d99a5cf7725
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fad6afabe76ea52242f10034f8797c23d3674c8c98c091ce9874f58404da396
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9979034fcf548b3e23fd45a62872fd7ef6105dc04e4dd23df2112f398552a57
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:049fdec0553048dd05a5ad06b02a552f63c94725a91eeac5445c5486fc909146
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:449907656080f489d0957ade04723380fc9c1022cbe70db248bd76c2479b6cb3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10fbc2c96490be41edb285328e8594b3d7a5b655870fbd35c0dc5dee8d29b72
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d07c008bd2cdb25b485adf5e638dba6920e10d6fcd21f404a65d134bb5f6bf5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:915fe561107f56ccd5cd8e3bc1f398b677c030eb325f5a04f8d80a8df56dbe2e
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd329b76d03f410648ef363897eabe18ec0afb682154549bd5a0ea9c20f875eb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d55733a134083157b7e4bd9816117f4f58ac26f258c32f14bcd22330f75627d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,38 +1,199 @@
1
  {
2
- "best_metric": 0.5555555524691358,
3
- "best_model_checkpoint": "checkpoint/cross_encoder_20250522_full_data/checkpoint-1",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 1,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "grad_norm": 3.537013292312622,
14
- "learning_rate": 1e-06,
15
- "loss": 0.7455,
16
  "step": 1
17
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  {
19
  "epoch": 1.0,
20
- "eval_f2": 0.5555555524691358,
21
- "eval_loss": 0.6809892654418945,
22
- "eval_precision": 0.2,
23
  "eval_recall": 1.0,
24
- "eval_runtime": 0.1671,
25
- "eval_samples_per_second": 59.844,
26
- "eval_steps_per_second": 5.984,
27
- "step": 1
28
  }
29
  ],
30
  "logging_steps": 200,
31
- "max_steps": 2,
32
  "num_input_tokens_seen": 0,
33
- "num_train_epochs": 2,
34
  "save_steps": 500,
35
- "total_flos": 16839409139712.0,
36
  "train_batch_size": 8,
37
  "trial_name": null,
38
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.49999999700000003,
3
+ "best_model_checkpoint": "checkpoint/cross_encoder_20250522_full_data/checkpoint-4633",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 4633,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0,
13
+ "grad_norm": 2.436934471130371,
14
+ "learning_rate": 4.315925766076824e-12,
15
+ "loss": 0.7375,
16
  "step": 1
17
  },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.8707695007324219,
21
+ "learning_rate": 8.631851532153648e-10,
22
+ "loss": 0.7326,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.09,
27
+ "grad_norm": 1.4507315158843994,
28
+ "learning_rate": 1.7263703064307296e-09,
29
+ "loss": 0.734,
30
+ "step": 400
31
+ },
32
+ {
33
+ "epoch": 0.13,
34
+ "grad_norm": 4.8174872398376465,
35
+ "learning_rate": 2.589555459646094e-09,
36
+ "loss": 0.731,
37
+ "step": 600
38
+ },
39
+ {
40
+ "epoch": 0.17,
41
+ "grad_norm": 0.8691744208335876,
42
+ "learning_rate": 3.452740612861459e-09,
43
+ "loss": 0.7356,
44
+ "step": 800
45
+ },
46
+ {
47
+ "epoch": 0.22,
48
+ "grad_norm": 1.7719039916992188,
49
+ "learning_rate": 4.315925766076823e-09,
50
+ "loss": 0.7286,
51
+ "step": 1000
52
+ },
53
+ {
54
+ "epoch": 0.26,
55
+ "grad_norm": 2.3143906593322754,
56
+ "learning_rate": 5.179110919292188e-09,
57
+ "loss": 0.7302,
58
+ "step": 1200
59
+ },
60
+ {
61
+ "epoch": 0.3,
62
+ "grad_norm": 2.4967010021209717,
63
+ "learning_rate": 6.042296072507553e-09,
64
+ "loss": 0.7318,
65
+ "step": 1400
66
+ },
67
+ {
68
+ "epoch": 0.35,
69
+ "grad_norm": 3.2115557193756104,
70
+ "learning_rate": 6.905481225722918e-09,
71
+ "loss": 0.7322,
72
+ "step": 1600
73
+ },
74
+ {
75
+ "epoch": 0.39,
76
+ "grad_norm": 2.6317012310028076,
77
+ "learning_rate": 7.768666378938282e-09,
78
+ "loss": 0.7288,
79
+ "step": 1800
80
+ },
81
+ {
82
+ "epoch": 0.43,
83
+ "grad_norm": 1.578616738319397,
84
+ "learning_rate": 8.631851532153647e-09,
85
+ "loss": 0.727,
86
+ "step": 2000
87
+ },
88
+ {
89
+ "epoch": 0.47,
90
+ "grad_norm": 2.6788175106048584,
91
+ "learning_rate": 9.495036685369011e-09,
92
+ "loss": 0.7236,
93
+ "step": 2200
94
+ },
95
+ {
96
+ "epoch": 0.52,
97
+ "grad_norm": 1.1162360906600952,
98
+ "learning_rate": 9.99960892371536e-09,
99
+ "loss": 0.7234,
100
+ "step": 2400
101
+ },
102
+ {
103
+ "epoch": 0.56,
104
+ "grad_norm": 3.412252426147461,
105
+ "learning_rate": 9.995454119562455e-09,
106
+ "loss": 0.7273,
107
+ "step": 2600
108
+ },
109
+ {
110
+ "epoch": 0.6,
111
+ "grad_norm": 2.049833297729492,
112
+ "learning_rate": 9.986762270880315e-09,
113
+ "loss": 0.7216,
114
+ "step": 2800
115
+ },
116
+ {
117
+ "epoch": 0.65,
118
+ "grad_norm": 2.14269757270813,
119
+ "learning_rate": 9.973541271907098e-09,
120
+ "loss": 0.7236,
121
+ "step": 3000
122
+ },
123
+ {
124
+ "epoch": 0.69,
125
+ "grad_norm": 3.2950925827026367,
126
+ "learning_rate": 9.955803130412195e-09,
127
+ "loss": 0.7257,
128
+ "step": 3200
129
+ },
130
+ {
131
+ "epoch": 0.73,
132
+ "grad_norm": 1.6116753816604614,
133
+ "learning_rate": 9.933563956790353e-09,
134
+ "loss": 0.7202,
135
+ "step": 3400
136
+ },
137
+ {
138
+ "epoch": 0.78,
139
+ "grad_norm": 2.2606937885284424,
140
+ "learning_rate": 9.906843949429669e-09,
141
+ "loss": 0.718,
142
+ "step": 3600
143
+ },
144
+ {
145
+ "epoch": 0.82,
146
+ "grad_norm": 3.41159725189209,
147
+ "learning_rate": 9.875667376366706e-09,
148
+ "loss": 0.717,
149
+ "step": 3800
150
+ },
151
+ {
152
+ "epoch": 0.86,
153
+ "grad_norm": 2.007009983062744,
154
+ "learning_rate": 9.840062553245418e-09,
155
+ "loss": 0.7156,
156
+ "step": 4000
157
+ },
158
+ {
159
+ "epoch": 0.91,
160
+ "grad_norm": 2.2354674339294434,
161
+ "learning_rate": 9.800061817599912e-09,
162
+ "loss": 0.7138,
163
+ "step": 4200
164
+ },
165
+ {
166
+ "epoch": 0.95,
167
+ "grad_norm": 3.80654239654541,
168
+ "learning_rate": 9.755701499484371e-09,
169
+ "loss": 0.7144,
170
+ "step": 4400
171
+ },
172
+ {
173
+ "epoch": 0.99,
174
+ "grad_norm": 5.391232490539551,
175
+ "learning_rate": 9.707021888476834e-09,
176
+ "loss": 0.7088,
177
+ "step": 4600
178
+ },
179
  {
180
  "epoch": 1.0,
181
+ "eval_f2": 0.49999999700000003,
182
+ "eval_loss": 0.7074111700057983,
183
+ "eval_precision": 0.16666666666666666,
184
  "eval_recall": 1.0,
185
+ "eval_runtime": 1108.1527,
186
+ "eval_samples_per_second": 108.462,
187
+ "eval_steps_per_second": 1.695,
188
+ "step": 4633
189
  }
190
  ],
191
  "logging_steps": 200,
192
+ "max_steps": 23165,
193
  "num_input_tokens_seen": 0,
194
+ "num_train_epochs": 5,
195
  "save_steps": 500,
196
+ "total_flos": 7.801698556418458e+16,
197
  "train_batch_size": 8,
198
  "trial_name": null,
199
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c35a0a052756eed7c3a8019317fe0fdeda73d19c1f4811fdf8efcef22ec1a7d
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6928b07215cfaf54c32601499d6f401be4da6b2575801bc58eba1077b9208c
3
  size 5048