seanfarrell commited on
Commit
d1ed40f
·
verified ·
1 Parent(s): 18822f2

update model with newer version

Browse files
Files changed (6) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +80 -84
  6. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d8a1fc823d3ba0d45afc06b1105997300ad38030351b439d85610fa9fa38a75
3
  size 430935892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a184558aeae2acda43c0b7d8660f3a40620a1e111e2788bf17507bc208a251a9
3
  size 430935892
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eed95c6f0459164abd29b411040228bdd2d3ab9ef8949ba8423fab4ddfecad90
3
  size 861991482
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:753a905dc66bdc951ff46dbc455dcef5a9601f90d54b21507aed3abb36edf4a1
3
  size 861991482
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4679e8a1fa9d31242916d045260107779d3bf18ad04f68edc0e2a86739d10fbd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:712caa512e2bd3a13bbc3abf2f8c256aa41b420c1f92a596df76f55140a4898f
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73ec52a655b60bd9b36d5e598a010f1db1893ca2657401a77e3d62edf772ae6f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49ca8cfba8e912830d0bbc9af499faa3ae8fc7cef2c871274f7a0984db09f49f
3
  size 1064
trainer_state.json CHANGED
@@ -1,125 +1,121 @@
1
  {
2
- "best_global_step": 1376,
3
- "best_metric": 0.008029412478208542,
4
- "best_model_checkpoint": "projects/PetBERT_annonymisation/data/case_sensitive/model/checkpoint-1376",
5
- "epoch": 5.0,
6
  "eval_steps": 500,
7
- "global_step": 3440,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
- "eval_f1": 0.010973861448813302,
15
- "eval_loss": 2.4063162803649902,
16
- "eval_precision": 0.09108980806958722,
17
- "eval_recall": 0.10473207318294019,
18
- "eval_runtime": 23.7722,
19
- "eval_samples_per_second": 139.407,
20
- "eval_steps_per_second": 4.375,
21
  "step": 0
22
  },
23
  {
24
- "epoch": 0.7267441860465116,
25
- "grad_norm": 0.2666139304637909,
26
- "learning_rate": 4.996373546511628e-05,
27
- "loss": 0.0518,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 1.0,
32
- "eval_f1": 0.9198586422350306,
33
- "eval_loss": 0.010010140016674995,
34
- "eval_precision": 0.9136306910527949,
35
- "eval_recall": 0.9281047546302535,
36
- "eval_runtime": 24.8489,
37
- "eval_samples_per_second": 133.366,
38
- "eval_steps_per_second": 4.185,
39
- "step": 688
40
  },
41
  {
42
- "epoch": 1.4534883720930232,
43
- "grad_norm": 0.11741874366998672,
44
- "learning_rate": 4.992739825581396e-05,
45
- "loss": 0.0036,
46
  "step": 1000
47
  },
48
  {
49
- "epoch": 2.0,
50
- "eval_f1": 0.9300254346144702,
51
- "eval_loss": 0.008029412478208542,
52
- "eval_precision": 0.9130114164496201,
53
- "eval_recall": 0.9489397415434179,
54
- "eval_runtime": 25.1412,
55
- "eval_samples_per_second": 131.815,
56
- "eval_steps_per_second": 4.137,
57
- "step": 1376
58
- },
59
- {
60
- "epoch": 2.1802325581395348,
61
- "grad_norm": 0.3377048373222351,
62
- "learning_rate": 4.989106104651163e-05,
63
  "loss": 0.0026,
64
  "step": 1500
65
  },
66
  {
67
- "epoch": 2.9069767441860463,
68
- "grad_norm": 0.2929118573665619,
69
- "learning_rate": 4.985472383720931e-05,
70
- "loss": 0.0011,
71
- "step": 2000
 
 
 
 
72
  },
73
  {
74
- "epoch": 3.0,
75
- "eval_f1": 0.9402795283929368,
76
- "eval_loss": 0.01124291867017746,
77
- "eval_precision": 0.9422754585534239,
78
- "eval_recall": 0.9386206401984958,
79
- "eval_runtime": 24.8815,
80
- "eval_samples_per_second": 133.191,
81
- "eval_steps_per_second": 4.18,
82
- "step": 2064
83
  },
84
  {
85
- "epoch": 3.633720930232558,
86
- "grad_norm": 0.002858501160517335,
87
- "learning_rate": 4.981838662790698e-05,
88
- "loss": 0.0012,
89
  "step": 2500
90
  },
91
  {
92
- "epoch": 4.0,
93
- "eval_f1": 0.9321076846329125,
94
- "eval_loss": 0.011350538581609726,
95
- "eval_precision": 0.9585579423470411,
96
- "eval_recall": 0.9092514508781232,
97
- "eval_runtime": 24.8844,
98
- "eval_samples_per_second": 133.176,
99
- "eval_steps_per_second": 4.179,
100
- "step": 2752
101
  },
102
  {
103
- "epoch": 4.3604651162790695,
104
- "grad_norm": 0.3254820704460144,
105
- "learning_rate": 4.978204941860465e-05,
106
- "loss": 0.001,
107
  "step": 3000
108
  },
109
  {
110
- "epoch": 5.0,
111
- "eval_f1": 0.9375959937426187,
112
- "eval_loss": 0.012251886539161205,
113
- "eval_precision": 0.9347887957568951,
114
- "eval_recall": 0.9407576824824448,
115
- "eval_runtime": 24.848,
116
- "eval_samples_per_second": 133.371,
117
- "eval_steps_per_second": 4.185,
118
- "step": 3440
 
 
 
 
 
 
 
119
  }
120
  ],
121
  "logging_steps": 500,
122
- "max_steps": 688000,
123
  "num_input_tokens_seen": 0,
124
  "num_train_epochs": 1000,
125
  "save_steps": 500,
@@ -144,7 +140,7 @@
144
  "attributes": {}
145
  }
146
  },
147
- "total_flos": 2.874236880509952e+16,
148
  "train_batch_size": 32,
149
  "trial_name": null,
150
  "trial_params": null
 
1
  {
2
+ "best_global_step": 899,
3
+ "best_metric": 0.009147428907454014,
4
+ "best_model_checkpoint": "projects/PetBERT_annonymisation/data/augment/checkpoint-899",
5
+ "epoch": 4.0,
6
  "eval_steps": 500,
7
+ "global_step": 3596,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
+ "eval_f1": 0.012204053693834897,
15
+ "eval_loss": 2.3885538578033447,
16
+ "eval_precision": 0.08893940326175621,
17
+ "eval_recall": 0.08310981045257043,
18
+ "eval_runtime": 31.2912,
19
+ "eval_samples_per_second": 139.592,
20
+ "eval_steps_per_second": 4.378,
21
  "step": 0
22
  },
23
  {
24
+ "epoch": 0.5561735261401557,
25
+ "grad_norm": 0.35977280139923096,
26
+ "learning_rate": 4.997224694104561e-05,
27
+ "loss": 0.0524,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 1.0,
32
+ "eval_f1": 0.9528093236906681,
33
+ "eval_loss": 0.009147428907454014,
34
+ "eval_precision": 0.9461252678130232,
35
+ "eval_recall": 0.9597947571160605,
36
+ "eval_runtime": 32.73,
37
+ "eval_samples_per_second": 133.456,
38
+ "eval_steps_per_second": 4.186,
39
+ "step": 899
40
  },
41
  {
42
+ "epoch": 1.1123470522803114,
43
+ "grad_norm": 0.11930424720048904,
44
+ "learning_rate": 4.99444382647386e-05,
45
+ "loss": 0.0043,
46
  "step": 1000
47
  },
48
  {
49
+ "epoch": 1.668520578420467,
50
+ "grad_norm": 0.12083794176578522,
51
+ "learning_rate": 4.991662958843159e-05,
 
 
 
 
 
 
 
 
 
 
 
52
  "loss": 0.0026,
53
  "step": 1500
54
  },
55
  {
56
+ "epoch": 2.0,
57
+ "eval_f1": 0.9591844285588763,
58
+ "eval_loss": 0.009312924928963184,
59
+ "eval_precision": 0.9530780078627331,
60
+ "eval_recall": 0.9657284498000394,
61
+ "eval_runtime": 32.7085,
62
+ "eval_samples_per_second": 133.543,
63
+ "eval_steps_per_second": 4.189,
64
+ "step": 1798
65
  },
66
  {
67
+ "epoch": 2.2246941045606228,
68
+ "grad_norm": 0.012293193489313126,
69
+ "learning_rate": 4.988882091212459e-05,
70
+ "loss": 0.0015,
71
+ "step": 2000
 
 
 
 
72
  },
73
  {
74
+ "epoch": 2.7808676307007785,
75
+ "grad_norm": 0.005511277820914984,
76
+ "learning_rate": 4.986101223581758e-05,
77
+ "loss": 0.0014,
78
  "step": 2500
79
  },
80
  {
81
+ "epoch": 3.0,
82
+ "eval_f1": 0.9473572974918704,
83
+ "eval_loss": 0.013561395928263664,
84
+ "eval_precision": 0.9406871215514148,
85
+ "eval_recall": 0.9544992839810097,
86
+ "eval_runtime": 32.7338,
87
+ "eval_samples_per_second": 133.44,
88
+ "eval_steps_per_second": 4.185,
89
+ "step": 2697
90
  },
91
  {
92
+ "epoch": 3.337041156840934,
93
+ "grad_norm": 0.03908713161945343,
94
+ "learning_rate": 4.983320355951057e-05,
95
+ "loss": 0.0014,
96
  "step": 3000
97
  },
98
  {
99
+ "epoch": 3.89321468298109,
100
+ "grad_norm": 0.009426549077033997,
101
+ "learning_rate": 4.980539488320356e-05,
102
+ "loss": 0.0016,
103
+ "step": 3500
104
+ },
105
+ {
106
+ "epoch": 4.0,
107
+ "eval_f1": 0.953040195996418,
108
+ "eval_loss": 0.010148942470550537,
109
+ "eval_precision": 0.9406877737118386,
110
+ "eval_recall": 0.9674011453268957,
111
+ "eval_runtime": 32.788,
112
+ "eval_samples_per_second": 133.219,
113
+ "eval_steps_per_second": 4.178,
114
+ "step": 3596
115
  }
116
  ],
117
  "logging_steps": 500,
118
+ "max_steps": 899000,
119
  "num_input_tokens_seen": 0,
120
  "num_train_epochs": 1000,
121
  "save_steps": 500,
 
140
  "attributes": {}
141
  }
142
  },
143
+ "total_flos": 3.006829642867507e+16,
144
  "train_batch_size": 32,
145
  "trial_name": null,
146
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7f8df01d5a1682106eccdf01898b00019477ad3b989dbc3741b897bbcf18bbf
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b027a28b72c517e30a4a077c26526840f9f8e189cb0dc1de6469ad0645aeeb7
3
  size 5304