End of training

Browse files

Files changed (8) hide show

all_results.json +8 -0
preprocessor_config.json +28 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
train_results.json +8 -0
trainer_state.json +735 -0
vocab.txt +0 -0

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 9.998542037907015,
+    "total_flos": 1.461291118888168e+18,
+    "train_loss": 1.202193304075487,
+    "train_runtime": 14493.7066,
+    "train_samples_per_second": 255.536,
+    "train_steps_per_second": 2.129
+}

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": false,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "ChineseCLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 9.998542037907015,
+    "total_flos": 1.461291118888168e+18,
+    "train_loss": 1.202193304075487,
+    "train_runtime": 14493.7066,
+    "train_samples_per_second": 255.536,
+    "train_steps_per_second": 2.129
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,735 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.998542037907015,
+  "eval_steps": 500,
+  "global_step": 30860,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.10011339705167666,
+      "grad_norm": 0.018291741609573364,
+      "learning_rate": 9.90051847051199e-05,
+      "loss": 2.1839,
+      "step": 309
+    },
+    {
+      "epoch": 0.2002267941033533,
+      "grad_norm": 0.02208893373608589,
+      "learning_rate": 9.800388852883993e-05,
+      "loss": 2.1361,
+      "step": 618
+    },
+    {
+      "epoch": 0.30034019115502997,
+      "grad_norm": 0.012775925919413567,
+      "learning_rate": 9.700259235255995e-05,
+      "loss": 2.1625,
+      "step": 927
+    },
+    {
+      "epoch": 0.4004535882067066,
+      "grad_norm": 0.016916805878281593,
+      "learning_rate": 9.600129617627997e-05,
+      "loss": 2.1534,
+      "step": 1236
+    },
+    {
+      "epoch": 0.5005669852583833,
+      "grad_norm": 0.006638580933213234,
+      "learning_rate": 9.5e-05,
+      "loss": 2.1464,
+      "step": 1545
+    },
+    {
+      "epoch": 0.6006803823100599,
+      "grad_norm": 0.013361390680074692,
+      "learning_rate": 9.399870382372004e-05,
+      "loss": 2.1685,
+      "step": 1854
+    },
+    {
+      "epoch": 0.7007937793617366,
+      "grad_norm": 0.005704471841454506,
+      "learning_rate": 9.300064808814e-05,
+      "loss": 2.1539,
+      "step": 2163
+    },
+    {
+      "epoch": 0.8009071764134132,
+      "grad_norm": 0.02282548137009144,
+      "learning_rate": 9.199935191186002e-05,
+      "loss": 2.1799,
+      "step": 2472
+    },
+    {
+      "epoch": 0.9010205734650899,
+      "grad_norm": 0.011914879083633423,
+      "learning_rate": 9.099805573558005e-05,
+      "loss": 2.1487,
+      "step": 2781
+    },
+    {
+      "epoch": 1.000971974728657,
+      "grad_norm": 0.020065952092409134,
+      "learning_rate": 8.999675955930007e-05,
+      "loss": 2.1416,
+      "step": 3090
+    },
+    {
+      "epoch": 1.1010853717803337,
+      "grad_norm": 0.020726900547742844,
+      "learning_rate": 8.899546338302009e-05,
+      "loss": 2.1422,
+      "step": 3399
+    },
+    {
+      "epoch": 1.2011987688320103,
+      "grad_norm": 0.02327028475701809,
+      "learning_rate": 8.799416720674012e-05,
+      "loss": 2.149,
+      "step": 3708
+    },
+    {
+      "epoch": 1.301312165883687,
+      "grad_norm": 0.03631984442472458,
+      "learning_rate": 8.699611147116008e-05,
+      "loss": 2.1605,
+      "step": 4017
+    },
+    {
+      "epoch": 1.4014255629353638,
+      "grad_norm": 0.03152529150247574,
+      "learning_rate": 8.59948152948801e-05,
+      "loss": 2.1436,
+      "step": 4326
+    },
+    {
+      "epoch": 1.5015389599870403,
+      "grad_norm": 0.05229083448648453,
+      "learning_rate": 8.499351911860013e-05,
+      "loss": 2.1794,
+      "step": 4635
+    },
+    {
+      "epoch": 1.601652357038717,
+      "grad_norm": 0.027535825967788696,
+      "learning_rate": 8.399222294232017e-05,
+      "loss": 2.1641,
+      "step": 4944
+    },
+    {
+      "epoch": 1.7017657540903937,
+      "grad_norm": 0.0046606422401964664,
+      "learning_rate": 8.299092676604019e-05,
+      "loss": 2.1242,
+      "step": 5253
+    },
+    {
+      "epoch": 1.8018791511420704,
+      "grad_norm": 0.040044769644737244,
+      "learning_rate": 8.198963058976021e-05,
+      "loss": 2.14,
+      "step": 5562
+    },
+    {
+      "epoch": 1.901992548193747,
+      "grad_norm": 0.01644105464220047,
+      "learning_rate": 8.098833441348024e-05,
+      "loss": 2.1488,
+      "step": 5871
+    },
+    {
+      "epoch": 2.001943949457314,
+      "grad_norm": 0.01979956403374672,
+      "learning_rate": 7.99902786779002e-05,
+      "loss": 2.1586,
+      "step": 6180
+    },
+    {
+      "epoch": 2.1020573465089907,
+      "grad_norm": 0.027056917548179626,
+      "learning_rate": 7.898898250162022e-05,
+      "loss": 2.1503,
+      "step": 6489
+    },
+    {
+      "epoch": 2.2021707435606674,
+      "grad_norm": 0.01640058308839798,
+      "learning_rate": 7.798768632534025e-05,
+      "loss": 2.1418,
+      "step": 6798
+    },
+    {
+      "epoch": 2.302284140612344,
+      "grad_norm": 0.01607314869761467,
+      "learning_rate": 7.698639014906027e-05,
+      "loss": 2.1514,
+      "step": 7107
+    },
+    {
+      "epoch": 2.4023975376640205,
+      "grad_norm": 0.017274878919124603,
+      "learning_rate": 7.59850939727803e-05,
+      "loss": 2.1465,
+      "step": 7416
+    },
+    {
+      "epoch": 2.5025109347156973,
+      "grad_norm": 0.024440627545118332,
+      "learning_rate": 7.498379779650033e-05,
+      "loss": 2.1211,
+      "step": 7725
+    },
+    {
+      "epoch": 2.602624331767374,
+      "grad_norm": 0.005127054639160633,
+      "learning_rate": 7.39857420609203e-05,
+      "loss": 2.1423,
+      "step": 8034
+    },
+    {
+      "epoch": 2.702737728819051,
+      "grad_norm": 0.05081469565629959,
+      "learning_rate": 7.298444588464032e-05,
+      "loss": 2.1705,
+      "step": 8343
+    },
+    {
+      "epoch": 2.8028511258707276,
+      "grad_norm": 0.01722005568444729,
+      "learning_rate": 7.198314970836034e-05,
+      "loss": 2.1462,
+      "step": 8652
+    },
+    {
+      "epoch": 2.902964522922404,
+      "grad_norm": 0.03728850930929184,
+      "learning_rate": 7.098185353208037e-05,
+      "loss": 2.1306,
+      "step": 8961
+    },
+    {
+      "epoch": 3.002915924185971,
+      "grad_norm": 0.016364697366952896,
+      "learning_rate": 6.998055735580039e-05,
+      "loss": 2.1639,
+      "step": 9270
+    },
+    {
+      "epoch": 3.103029321237648,
+      "grad_norm": 0.005412334576249123,
+      "learning_rate": 6.897926117952041e-05,
+      "loss": 2.1624,
+      "step": 9579
+    },
+    {
+      "epoch": 3.2031427182893246,
+      "grad_norm": 0.0071863215416669846,
+      "learning_rate": 6.797796500324045e-05,
+      "loss": 2.1682,
+      "step": 9888
+    },
+    {
+      "epoch": 3.303256115341001,
+      "grad_norm": 0.01707269623875618,
+      "learning_rate": 6.69799092676604e-05,
+      "loss": 2.129,
+      "step": 10197
+    },
+    {
+      "epoch": 3.4033695123926777,
+      "grad_norm": 0.0162820965051651,
+      "learning_rate": 6.597861309138042e-05,
+      "loss": 2.1552,
+      "step": 10506
+    },
+    {
+      "epoch": 3.5034829094443545,
+      "grad_norm": 0.005164678208529949,
+      "learning_rate": 6.497731691510046e-05,
+      "loss": 2.137,
+      "step": 10815
+    },
+    {
+      "epoch": 3.6035963064960312,
+      "grad_norm": 0.01631810888648033,
+      "learning_rate": 6.397602073882049e-05,
+      "loss": 2.1578,
+      "step": 11124
+    },
+    {
+      "epoch": 3.703709703547708,
+      "grad_norm": 0.004154821392148733,
+      "learning_rate": 6.297472456254051e-05,
+      "loss": 2.1602,
+      "step": 11433
+    },
+    {
+      "epoch": 3.8038231005993843,
+      "grad_norm": 0.004850070457905531,
+      "learning_rate": 6.197342838626053e-05,
+      "loss": 2.1412,
+      "step": 11742
+    },
+    {
+      "epoch": 3.903936497651061,
+      "grad_norm": 0.0028279961552470922,
+      "learning_rate": 6.0975372650680494e-05,
+      "loss": 2.1338,
+      "step": 12051
+    },
+    {
+      "epoch": 4.003887898914628,
+      "grad_norm": 0.016007574275135994,
+      "learning_rate": 5.997407647440052e-05,
+      "loss": 2.1753,
+      "step": 12360
+    },
+    {
+      "epoch": 4.104001295966305,
+      "grad_norm": 0.013178674504160881,
+      "learning_rate": 5.897278029812054e-05,
+      "loss": 2.1533,
+      "step": 12669
+    },
+    {
+      "epoch": 4.204114693017981,
+      "grad_norm": 0.010946434922516346,
+      "learning_rate": 5.797148412184057e-05,
+      "loss": 2.1506,
+      "step": 12978
+    },
+    {
+      "epoch": 4.304228090069659,
+      "grad_norm": 0.06032814085483551,
+      "learning_rate": 5.6970187945560595e-05,
+      "loss": 2.1408,
+      "step": 13287
+    },
+    {
+      "epoch": 4.404341487121335,
+      "grad_norm": 0.036340948194265366,
+      "learning_rate": 5.596889176928063e-05,
+      "loss": 2.1317,
+      "step": 13596
+    },
+    {
+      "epoch": 4.505750850477888,
+      "grad_norm": 0.022454094141721725,
+      "learning_rate": 5.4967595593000656e-05,
+      "loss": 2.1524,
+      "step": 13905
+    },
+    {
+      "epoch": 4.605864247529564,
+      "grad_norm": 0.01212249230593443,
+      "learning_rate": 5.396953985742061e-05,
+      "loss": 2.1681,
+      "step": 14214
+    },
+    {
+      "epoch": 4.705977644581241,
+      "grad_norm": 0.021747123450040817,
+      "learning_rate": 5.296824368114064e-05,
+      "loss": 2.1562,
+      "step": 14523
+    },
+    {
+      "epoch": 4.806091041632918,
+      "grad_norm": 0.005955239292234182,
+      "learning_rate": 5.196694750486067e-05,
+      "loss": 2.1828,
+      "step": 14832
+    },
+    {
+      "epoch": 4.906204438684594,
+      "grad_norm": 0.012121310457587242,
+      "learning_rate": 5.096565132858069e-05,
+      "loss": 2.1437,
+      "step": 15141
+    },
+    {
+      "epoch": 5.006479831524381,
+      "grad_norm": 0.015455316752195358,
+      "learning_rate": 4.996435515230072e-05,
+      "loss": 2.1528,
+      "step": 15450
+    },
+    {
+      "epoch": 5.106593228576057,
+      "grad_norm": 0.0007723022717982531,
+      "learning_rate": 4.896305897602074e-05,
+      "loss": 2.1723,
+      "step": 15759
+    },
+    {
+      "epoch": 5.206706625627733,
+      "grad_norm": 0.004882230423390865,
+      "learning_rate": 4.7965003240440704e-05,
+      "loss": 2.1525,
+      "step": 16068
+    },
+    {
+      "epoch": 5.30682002267941,
+      "grad_norm": 0.012511253356933594,
+      "learning_rate": 4.696370706416073e-05,
+      "loss": 2.1458,
+      "step": 16377
+    },
+    {
+      "epoch": 5.406933419731087,
+      "grad_norm": 0.0262750256806612,
+      "learning_rate": 4.596241088788075e-05,
+      "loss": 2.0936,
+      "step": 16686
+    },
+    {
+      "epoch": 5.507046816782764,
+      "grad_norm": 0.007834335789084435,
+      "learning_rate": 4.4961114711600785e-05,
+      "loss": 2.1322,
+      "step": 16995
+    },
+    {
+      "epoch": 5.60716021383444,
+      "grad_norm": 0.020112166181206703,
+      "learning_rate": 4.3959818535320806e-05,
+      "loss": 2.1223,
+      "step": 17304
+    },
+    {
+      "epoch": 5.707273610886117,
+      "grad_norm": 0.0009047465864568949,
+      "learning_rate": 4.295852235904083e-05,
+      "loss": 2.1425,
+      "step": 17613
+    },
+    {
+      "epoch": 5.807387007937794,
+      "grad_norm": 0.006675088778138161,
+      "learning_rate": 4.195722618276085e-05,
+      "loss": 2.1433,
+      "step": 17922
+    },
+    {
+      "epoch": 5.90750040498947,
+      "grad_norm": 0.022665197029709816,
+      "learning_rate": 4.0959170447180816e-05,
+      "loss": 2.1942,
+      "step": 18231
+    },
+    {
+      "epoch": 6.007451806253037,
+      "grad_norm": 0.039628468453884125,
+      "learning_rate": 3.995787427090085e-05,
+      "loss": 2.1444,
+      "step": 18540
+    },
+    {
+      "epoch": 6.107565203304714,
+      "grad_norm": 0.0011520631378516555,
+      "learning_rate": 3.895657809462087e-05,
+      "loss": 2.1524,
+      "step": 18849
+    },
+    {
+      "epoch": 6.207678600356391,
+      "grad_norm": 0.03555034101009369,
+      "learning_rate": 3.79552819183409e-05,
+      "loss": 2.1203,
+      "step": 19158
+    },
+    {
+      "epoch": 6.307791997408067,
+      "grad_norm": 0.013503223657608032,
+      "learning_rate": 3.695398574206092e-05,
+      "loss": 2.1352,
+      "step": 19467
+    },
+    {
+      "epoch": 6.407905394459744,
+      "grad_norm": 0.026100030168890953,
+      "learning_rate": 3.595268956578095e-05,
+      "loss": 2.1655,
+      "step": 19776
+    },
+    {
+      "epoch": 6.508018791511421,
+      "grad_norm": 0.0006254952168092132,
+      "learning_rate": 3.4954633830200915e-05,
+      "loss": 2.1352,
+      "step": 20085
+    },
+    {
+      "epoch": 6.608132188563097,
+      "grad_norm": 0.01487251278012991,
+      "learning_rate": 3.3953337653920935e-05,
+      "loss": 2.1799,
+      "step": 20394
+    },
+    {
+      "epoch": 6.708245585614774,
+      "grad_norm": 0.0053134192712605,
+      "learning_rate": 3.295204147764096e-05,
+      "loss": 2.1506,
+      "step": 20703
+    },
+    {
+      "epoch": 6.808358982666451,
+      "grad_norm": 0.006517978850752115,
+      "learning_rate": 3.195074530136098e-05,
+      "loss": 2.1567,
+      "step": 21012
+    },
+    {
+      "epoch": 6.908472379718127,
+      "grad_norm": 0.014856001362204552,
+      "learning_rate": 3.0949449125081016e-05,
+      "loss": 2.1593,
+      "step": 21321
+    },
+    {
+      "epoch": 7.008423780981694,
+      "grad_norm": 0.0019718091934919357,
+      "learning_rate": 2.994815294880104e-05,
+      "loss": 2.1634,
+      "step": 21630
+    },
+    {
+      "epoch": 7.108537178033371,
+      "grad_norm": 0.017417173832654953,
+      "learning_rate": 2.8946856772521063e-05,
+      "loss": 2.1433,
+      "step": 21939
+    },
+    {
+      "epoch": 7.208650575085048,
+      "grad_norm": 0.034113720059394836,
+      "learning_rate": 2.7948801036941023e-05,
+      "loss": 2.1489,
+      "step": 22248
+    },
+    {
+      "epoch": 7.308763972136725,
+      "grad_norm": 0.018758224323391914,
+      "learning_rate": 2.694750486066105e-05,
+      "loss": 2.13,
+      "step": 22557
+    },
+    {
+      "epoch": 7.408877369188401,
+      "grad_norm": 0.04419185221195221,
+      "learning_rate": 2.594620868438108e-05,
+      "loss": 2.1641,
+      "step": 22866
+    },
+    {
+      "epoch": 7.508990766240077,
+      "grad_norm": 0.001091700978577137,
+      "learning_rate": 2.4944912508101104e-05,
+      "loss": 2.1763,
+      "step": 23175
+    },
+    {
+      "epoch": 7.609104163291755,
+      "grad_norm": 0.0011129506165161729,
+      "learning_rate": 2.3943616331821128e-05,
+      "loss": 2.125,
+      "step": 23484
+    },
+    {
+      "epoch": 7.709217560343431,
+      "grad_norm": 0.007060033269226551,
+      "learning_rate": 2.2942320155541155e-05,
+      "loss": 2.1357,
+      "step": 23793
+    },
+    {
+      "epoch": 7.809330957395108,
+      "grad_norm": 0.004454698413610458,
+      "learning_rate": 2.1944264419961118e-05,
+      "loss": 2.1502,
+      "step": 24102
+    },
+    {
+      "epoch": 7.9094443544467845,
+      "grad_norm": 0.014023036696016788,
+      "learning_rate": 2.0942968243681142e-05,
+      "loss": 2.1397,
+      "step": 24411
+    },
+    {
+      "epoch": 8.00939575571035,
+      "grad_norm": 0.0020535311195999384,
+      "learning_rate": 1.994167206740117e-05,
+      "loss": 2.1594,
+      "step": 24720
+    },
+    {
+      "epoch": 8.109509152762028,
+      "grad_norm": 0.030191343277692795,
+      "learning_rate": 1.8940375891121192e-05,
+      "loss": 2.1379,
+      "step": 25029
+    },
+    {
+      "epoch": 8.209622549813705,
+      "grad_norm": 0.02658534049987793,
+      "learning_rate": 1.793907971484122e-05,
+      "loss": 2.163,
+      "step": 25338
+    },
+    {
+      "epoch": 8.30973594686538,
+      "grad_norm": 0.006291504483669996,
+      "learning_rate": 1.6937783538561243e-05,
+      "loss": 2.1445,
+      "step": 25647
+    },
+    {
+      "epoch": 8.409849343917058,
+      "grad_norm": 0.013936794362962246,
+      "learning_rate": 1.593648736228127e-05,
+      "loss": 2.1571,
+      "step": 25956
+    },
+    {
+      "epoch": 8.509962740968735,
+      "grad_norm": 0.031892433762550354,
+      "learning_rate": 1.4938431626701233e-05,
+      "loss": 2.1562,
+      "step": 26265
+    },
+    {
+      "epoch": 8.610076138020412,
+      "grad_norm": 0.0028675836510956287,
+      "learning_rate": 1.3937135450421257e-05,
+      "loss": 2.1373,
+      "step": 26574
+    },
+    {
+      "epoch": 8.710189535072088,
+      "grad_norm": 0.027382852509617805,
+      "learning_rate": 1.2935839274141284e-05,
+      "loss": 2.1162,
+      "step": 26883
+    },
+    {
+      "epoch": 8.810302932123765,
+      "grad_norm": 0.006504488177597523,
+      "learning_rate": 1.193454309786131e-05,
+      "loss": 2.1281,
+      "step": 27192
+    },
+    {
+      "epoch": 8.910416329175442,
+      "grad_norm": 0.006998216733336449,
+      "learning_rate": 1.0933246921581337e-05,
+      "loss": 2.1681,
+      "step": 27501
+    },
+    {
+      "epoch": 9.01036773043901,
+      "grad_norm": 0.0019873257260769606,
+      "learning_rate": 9.931950745301362e-06,
+      "loss": 2.1397,
+      "step": 27810
+    },
+    {
+      "epoch": 9.110481127490685,
+      "grad_norm": 0.04183882847428322,
+      "learning_rate": 8.933895009721323e-06,
+      "loss": 2.1425,
+      "step": 28119
+    },
+    {
+      "epoch": 9.210594524542362,
+      "grad_norm": 0.008224571123719215,
+      "learning_rate": 7.932598833441349e-06,
+      "loss": 2.1567,
+      "step": 28428
+    },
+    {
+      "epoch": 9.31070792159404,
+      "grad_norm": 0.009601627476513386,
+      "learning_rate": 6.931302657161375e-06,
+      "loss": 2.1334,
+      "step": 28737
+    },
+    {
+      "epoch": 9.410821318645715,
+      "grad_norm": 0.028333676978945732,
+      "learning_rate": 5.9300064808814e-06,
+      "loss": 2.1732,
+      "step": 29046
+    },
+    {
+      "epoch": 9.510934715697392,
+      "grad_norm": 0.01850961521267891,
+      "learning_rate": 4.9287103046014265e-06,
+      "loss": 2.1401,
+      "step": 29355
+    },
+    {
+      "epoch": 9.611048112749069,
+      "grad_norm": 0.0022975043393671513,
+      "learning_rate": 3.927414128321452e-06,
+      "loss": 2.1523,
+      "step": 29664
+    },
+    {
+      "epoch": 9.711161509800744,
+      "grad_norm": 0.033216096460819244,
+      "learning_rate": 2.9261179520414777e-06,
+      "loss": 2.1497,
+      "step": 29973
+    },
+    {
+      "epoch": 9.811274906852422,
+      "grad_norm": 0.024143142625689507,
+      "learning_rate": 1.928062216461439e-06,
+      "loss": 2.1504,
+      "step": 30282
+    },
+    {
+      "epoch": 9.911388303904099,
+      "grad_norm": 0.0085253044962883,
+      "learning_rate": 9.267660401814646e-07,
+      "loss": 2.177,
+      "step": 30591
+    },
+    {
+      "epoch": 9.998542037907015,
+      "step": 30860,
+      "total_flos": 1.461291118888168e+18,
+      "train_loss": 1.202193304075487,
+      "train_runtime": 14493.7066,
+      "train_samples_per_second": 255.536,
+      "train_steps_per_second": 2.129
+    }
+  ],
+  "logging_steps": 309,
+  "max_steps": 30860,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 6800,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.461291118888168e+18,
+  "train_batch_size": 30,
+  "trial_name": null,
+  "trial_params": null
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff