Tapendra commited on
Commit
6339bdb
·
verified ·
1 Parent(s): 0cf055e

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +160 -0
trainer_state.json ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 15.0,
6
+ "eval_steps": 500,
7
+ "global_step": 18390,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.8156606851549756,
14
+ "grad_norm": 2.7835583686828613,
15
+ "learning_rate": 0.00019968889417401253,
16
+ "loss": 0.6438,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 1.631321370309951,
21
+ "grad_norm": 0.3261709213256836,
22
+ "learning_rate": 0.00019676585418772425,
23
+ "loss": 0.2388,
24
+ "step": 2000
25
+ },
26
+ {
27
+ "epoch": 2.4469820554649266,
28
+ "grad_norm": 2.9635205268859863,
29
+ "learning_rate": 0.00019084912501825553,
30
+ "loss": 0.2032,
31
+ "step": 3000
32
+ },
33
+ {
34
+ "epoch": 3.262642740619902,
35
+ "grad_norm": 2.162414312362671,
36
+ "learning_rate": 0.00018212175520336934,
37
+ "loss": 0.163,
38
+ "step": 4000
39
+ },
40
+ {
41
+ "epoch": 4.078303425774878,
42
+ "grad_norm": 3.4721012115478516,
43
+ "learning_rate": 0.00017085374734710157,
44
+ "loss": 0.1519,
45
+ "step": 5000
46
+ },
47
+ {
48
+ "epoch": 4.893964110929853,
49
+ "grad_norm": 1.8501887321472168,
50
+ "learning_rate": 0.0001573937049265616,
51
+ "loss": 0.13,
52
+ "step": 6000
53
+ },
54
+ {
55
+ "epoch": 5.709624796084829,
56
+ "grad_norm": 3.3048102855682373,
57
+ "learning_rate": 0.00014215804738782126,
58
+ "loss": 0.1186,
59
+ "step": 7000
60
+ },
61
+ {
62
+ "epoch": 6.525285481239804,
63
+ "grad_norm": 2.223738193511963,
64
+ "learning_rate": 0.00012561812718836913,
65
+ "loss": 0.1046,
66
+ "step": 8000
67
+ },
68
+ {
69
+ "epoch": 7.3409461663947795,
70
+ "grad_norm": 0.2893391251564026,
71
+ "learning_rate": 0.00010828564735203954,
72
+ "loss": 0.0957,
73
+ "step": 9000
74
+ },
75
+ {
76
+ "epoch": 8.156606851549755,
77
+ "grad_norm": 0.24986231327056885,
78
+ "learning_rate": 9.069683068014265e-05,
79
+ "loss": 0.0912,
80
+ "step": 10000
81
+ },
82
+ {
83
+ "epoch": 8.97226753670473,
84
+ "grad_norm": 0.6728571653366089,
85
+ "learning_rate": 7.339583038310173e-05,
86
+ "loss": 0.08,
87
+ "step": 11000
88
+ },
89
+ {
90
+ "epoch": 9.787928221859707,
91
+ "grad_norm": 0.3318624794483185,
92
+ "learning_rate": 5.69178953654216e-05,
93
+ "loss": 0.0722,
94
+ "step": 12000
95
+ },
96
+ {
97
+ "epoch": 10.603588907014682,
98
+ "grad_norm": 0.1652187556028366,
99
+ "learning_rate": 4.177281098721372e-05,
100
+ "loss": 0.07,
101
+ "step": 13000
102
+ },
103
+ {
104
+ "epoch": 11.419249592169658,
105
+ "grad_norm": 0.13997943699359894,
106
+ "learning_rate": 2.8429127602959905e-05,
107
+ "loss": 0.0635,
108
+ "step": 14000
109
+ },
110
+ {
111
+ "epoch": 12.234910277324634,
112
+ "grad_norm": 0.17078348994255066,
113
+ "learning_rate": 1.729966480637476e-05,
114
+ "loss": 0.0607,
115
+ "step": 15000
116
+ },
117
+ {
118
+ "epoch": 13.05057096247961,
119
+ "grad_norm": 0.10830472409725189,
120
+ "learning_rate": 8.728739843127509e-06,
121
+ "loss": 0.0599,
122
+ "step": 16000
123
+ },
124
+ {
125
+ "epoch": 13.866231647634583,
126
+ "grad_norm": 0.13888326287269592,
127
+ "learning_rate": 2.9815153118197825e-06,
128
+ "loss": 0.0566,
129
+ "step": 17000
130
+ },
131
+ {
132
+ "epoch": 14.681892332789559,
133
+ "grad_norm": 0.138445645570755,
134
+ "learning_rate": 2.3579570823278885e-07,
135
+ "loss": 0.0556,
136
+ "step": 18000
137
+ }
138
+ ],
139
+ "logging_steps": 1000,
140
+ "max_steps": 18390,
141
+ "num_input_tokens_seen": 0,
142
+ "num_train_epochs": 15,
143
+ "save_steps": 6000,
144
+ "stateful_callbacks": {
145
+ "TrainerControl": {
146
+ "args": {
147
+ "should_epoch_stop": false,
148
+ "should_evaluate": false,
149
+ "should_log": false,
150
+ "should_save": true,
151
+ "should_training_stop": true
152
+ },
153
+ "attributes": {}
154
+ }
155
+ },
156
+ "total_flos": 7.5627004461372e+16,
157
+ "train_batch_size": 1,
158
+ "trial_name": null,
159
+ "trial_params": null
160
+ }