Training in progress, step 100, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/config.json +30 -0
last-checkpoint/generation_config.json +8 -0
last-checkpoint/model.safetensors +3 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/trainer_state.json +742 -0
last-checkpoint/training_args.bin +3 -0

last-checkpoint/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 31989,
+  "eos_token_id": 31989,
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 768,
+  "initializer_range": 0.036084391824351615,
+  "intermediate_size": 1920,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 27,
+  "num_key_value_heads": 4,
+  "pad_token_id": 31989,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.54.1",
+  "use_cache": false,
+  "vocab_size": 32000
+}

last-checkpoint/generation_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 31989,
+  "eos_token_id": 31989,
+  "pad_token_id": 31989,
+  "transformers_version": "4.54.1",
+  "use_cache": false
+}

last-checkpoint/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4bd4d7deb85b2a3543b5afa31bbc84f94cbb8c34a94e35e2215c354aebb0376d
+size 373077376

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16f6b0ee44f3b73f289f7863976838a4d4935e338ff11708d6a81805fb7aaa52
+size 373225035

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a794a6cb9cd4bbd0c53d08db0e20a5536c789bba6f22113385c1c408d58908bd
+size 14645

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fabf68d4ba15fc505c3ddaa1aef5ae6a1cb7856a1ce87040e1d0d9375e28c9bf
+size 1401

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,742 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.2079002079002079,
+  "eval_steps": 100,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.002079002079002079,
+      "grad_norm": 1.4921875,
+      "learning_rate": 0.0,
+      "loss": 10.8788,
+      "step": 1
+    },
+    {
+      "epoch": 0.004158004158004158,
+      "grad_norm": 4.5,
+      "learning_rate": 4.149377593360996e-06,
+      "loss": 10.8665,
+      "step": 2
+    },
+    {
+      "epoch": 0.006237006237006237,
+      "grad_norm": 1.40625,
+      "learning_rate": 8.298755186721992e-06,
+      "loss": 10.8779,
+      "step": 3
+    },
+    {
+      "epoch": 0.008316008316008316,
+      "grad_norm": 1.375,
+      "learning_rate": 1.2448132780082987e-05,
+      "loss": 10.8758,
+      "step": 4
+    },
+    {
+      "epoch": 0.010395010395010396,
+      "grad_norm": 1.6328125,
+      "learning_rate": 1.6597510373443984e-05,
+      "loss": 10.8705,
+      "step": 5
+    },
+    {
+      "epoch": 0.012474012474012475,
+      "grad_norm": 1.3984375,
+      "learning_rate": 2.074688796680498e-05,
+      "loss": 10.8667,
+      "step": 6
+    },
+    {
+      "epoch": 0.014553014553014554,
+      "grad_norm": 1.4296875,
+      "learning_rate": 2.4896265560165973e-05,
+      "loss": 10.8705,
+      "step": 7
+    },
+    {
+      "epoch": 0.016632016632016633,
+      "grad_norm": 1.3515625,
+      "learning_rate": 2.904564315352697e-05,
+      "loss": 10.875,
+      "step": 8
+    },
+    {
+      "epoch": 0.018711018711018712,
+      "grad_norm": 1.609375,
+      "learning_rate": 3.319502074688797e-05,
+      "loss": 10.8695,
+      "step": 9
+    },
+    {
+      "epoch": 0.02079002079002079,
+      "grad_norm": 1.3125,
+      "learning_rate": 3.734439834024896e-05,
+      "loss": 10.8753,
+      "step": 10
+    },
+    {
+      "epoch": 0.02286902286902287,
+      "grad_norm": 1.40625,
+      "learning_rate": 4.149377593360996e-05,
+      "loss": 10.8749,
+      "step": 11
+    },
+    {
+      "epoch": 0.02494802494802495,
+      "grad_norm": 1.375,
+      "learning_rate": 4.564315352697095e-05,
+      "loss": 10.8792,
+      "step": 12
+    },
+    {
+      "epoch": 0.02702702702702703,
+      "grad_norm": 1.328125,
+      "learning_rate": 4.9792531120331946e-05,
+      "loss": 10.869,
+      "step": 13
+    },
+    {
+      "epoch": 0.029106029106029108,
+      "grad_norm": 1.3984375,
+      "learning_rate": 5.394190871369295e-05,
+      "loss": 10.8685,
+      "step": 14
+    },
+    {
+      "epoch": 0.031185031185031187,
+      "grad_norm": 1.46875,
+      "learning_rate": 5.809128630705394e-05,
+      "loss": 10.8719,
+      "step": 15
+    },
+    {
+      "epoch": 0.033264033264033266,
+      "grad_norm": 1.3828125,
+      "learning_rate": 6.224066390041494e-05,
+      "loss": 10.8719,
+      "step": 16
+    },
+    {
+      "epoch": 0.035343035343035345,
+      "grad_norm": 1.3828125,
+      "learning_rate": 6.639004149377594e-05,
+      "loss": 10.8723,
+      "step": 17
+    },
+    {
+      "epoch": 0.037422037422037424,
+      "grad_norm": 1.390625,
+      "learning_rate": 7.053941908713692e-05,
+      "loss": 10.8653,
+      "step": 18
+    },
+    {
+      "epoch": 0.0395010395010395,
+      "grad_norm": 1.3671875,
+      "learning_rate": 7.468879668049793e-05,
+      "loss": 10.863,
+      "step": 19
+    },
+    {
+      "epoch": 0.04158004158004158,
+      "grad_norm": 1.53125,
+      "learning_rate": 7.883817427385891e-05,
+      "loss": 10.8578,
+      "step": 20
+    },
+    {
+      "epoch": 0.04365904365904366,
+      "grad_norm": 1.40625,
+      "learning_rate": 8.298755186721991e-05,
+      "loss": 10.8624,
+      "step": 21
+    },
+    {
+      "epoch": 0.04573804573804574,
+      "grad_norm": 1.3515625,
+      "learning_rate": 8.713692946058092e-05,
+      "loss": 10.8686,
+      "step": 22
+    },
+    {
+      "epoch": 0.04781704781704782,
+      "grad_norm": 1.53125,
+      "learning_rate": 9.12863070539419e-05,
+      "loss": 10.8658,
+      "step": 23
+    },
+    {
+      "epoch": 0.0498960498960499,
+      "grad_norm": 4.40625,
+      "learning_rate": 9.54356846473029e-05,
+      "loss": 10.8098,
+      "step": 24
+    },
+    {
+      "epoch": 0.05197505197505198,
+      "grad_norm": 1.46875,
+      "learning_rate": 9.958506224066389e-05,
+      "loss": 10.8677,
+      "step": 25
+    },
+    {
+      "epoch": 0.05405405405405406,
+      "grad_norm": 1.4765625,
+      "learning_rate": 0.00010373443983402491,
+      "loss": 10.8655,
+      "step": 26
+    },
+    {
+      "epoch": 0.056133056133056136,
+      "grad_norm": 4.375,
+      "learning_rate": 0.0001078838174273859,
+      "loss": 10.8047,
+      "step": 27
+    },
+    {
+      "epoch": 0.058212058212058215,
+      "grad_norm": 1.4609375,
+      "learning_rate": 0.0001120331950207469,
+      "loss": 10.8632,
+      "step": 28
+    },
+    {
+      "epoch": 0.060291060291060294,
+      "grad_norm": 1.5,
+      "learning_rate": 0.00011618257261410788,
+      "loss": 10.8634,
+      "step": 29
+    },
+    {
+      "epoch": 0.062370062370062374,
+      "grad_norm": 1.4296875,
+      "learning_rate": 0.00012033195020746888,
+      "loss": 10.8661,
+      "step": 30
+    },
+    {
+      "epoch": 0.06444906444906445,
+      "grad_norm": 1.296875,
+      "learning_rate": 0.00012448132780082987,
+      "loss": 10.864,
+      "step": 31
+    },
+    {
+      "epoch": 0.06652806652806653,
+      "grad_norm": 1.453125,
+      "learning_rate": 0.00012863070539419086,
+      "loss": 10.8622,
+      "step": 32
+    },
+    {
+      "epoch": 0.06860706860706861,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.00013278008298755187,
+      "loss": 10.8646,
+      "step": 33
+    },
+    {
+      "epoch": 0.07068607068607069,
+      "grad_norm": 1.4921875,
+      "learning_rate": 0.0001369294605809129,
+      "loss": 10.8593,
+      "step": 34
+    },
+    {
+      "epoch": 0.07276507276507277,
+      "grad_norm": 1.4296875,
+      "learning_rate": 0.00014107883817427385,
+      "loss": 10.8608,
+      "step": 35
+    },
+    {
+      "epoch": 0.07484407484407485,
+      "grad_norm": 1.2109375,
+      "learning_rate": 0.00014522821576763486,
+      "loss": 10.8601,
+      "step": 36
+    },
+    {
+      "epoch": 0.07692307692307693,
+      "grad_norm": 1.3046875,
+      "learning_rate": 0.00014937759336099585,
+      "loss": 10.8584,
+      "step": 37
+    },
+    {
+      "epoch": 0.079002079002079,
+      "grad_norm": 1.4609375,
+      "learning_rate": 0.00015352697095435687,
+      "loss": 10.8649,
+      "step": 38
+    },
+    {
+      "epoch": 0.08108108108108109,
+      "grad_norm": 1.3671875,
+      "learning_rate": 0.00015767634854771783,
+      "loss": 10.8509,
+      "step": 39
+    },
+    {
+      "epoch": 0.08316008316008316,
+      "grad_norm": 1.53125,
+      "learning_rate": 0.00016182572614107884,
+      "loss": 10.8655,
+      "step": 40
+    },
+    {
+      "epoch": 0.08523908523908524,
+      "grad_norm": 1.359375,
+      "learning_rate": 0.00016597510373443983,
+      "loss": 10.8572,
+      "step": 41
+    },
+    {
+      "epoch": 0.08731808731808732,
+      "grad_norm": 1.421875,
+      "learning_rate": 0.00017012448132780084,
+      "loss": 10.8498,
+      "step": 42
+    },
+    {
+      "epoch": 0.0893970893970894,
+      "grad_norm": 3.78125,
+      "learning_rate": 0.00017427385892116183,
+      "loss": 10.7,
+      "step": 43
+    },
+    {
+      "epoch": 0.09147609147609148,
+      "grad_norm": 1.3125,
+      "learning_rate": 0.00017842323651452282,
+      "loss": 10.8577,
+      "step": 44
+    },
+    {
+      "epoch": 0.09355509355509356,
+      "grad_norm": 1.390625,
+      "learning_rate": 0.0001825726141078838,
+      "loss": 10.8574,
+      "step": 45
+    },
+    {
+      "epoch": 0.09563409563409564,
+      "grad_norm": 1.46875,
+      "learning_rate": 0.00018672199170124482,
+      "loss": 10.8341,
+      "step": 46
+    },
+    {
+      "epoch": 0.09771309771309772,
+      "grad_norm": 1.390625,
+      "learning_rate": 0.0001908713692946058,
+      "loss": 10.8505,
+      "step": 47
+    },
+    {
+      "epoch": 0.0997920997920998,
+      "grad_norm": 1.3671875,
+      "learning_rate": 0.0001950207468879668,
+      "loss": 10.8296,
+      "step": 48
+    },
+    {
+      "epoch": 0.10187110187110188,
+      "grad_norm": 1.3984375,
+      "learning_rate": 0.00019917012448132778,
+      "loss": 10.8462,
+      "step": 49
+    },
+    {
+      "epoch": 0.10395010395010396,
+      "grad_norm": 1.375,
+      "learning_rate": 0.0002033195020746888,
+      "loss": 10.8442,
+      "step": 50
+    },
+    {
+      "epoch": 0.10602910602910603,
+      "grad_norm": 1.3203125,
+      "learning_rate": 0.00020746887966804981,
+      "loss": 10.8353,
+      "step": 51
+    },
+    {
+      "epoch": 0.10810810810810811,
+      "grad_norm": 1.4609375,
+      "learning_rate": 0.0002116182572614108,
+      "loss": 10.8277,
+      "step": 52
+    },
+    {
+      "epoch": 0.1101871101871102,
+      "grad_norm": 1.3125,
+      "learning_rate": 0.0002157676348547718,
+      "loss": 10.8403,
+      "step": 53
+    },
+    {
+      "epoch": 0.11226611226611227,
+      "grad_norm": 1.3359375,
+      "learning_rate": 0.00021991701244813278,
+      "loss": 10.829,
+      "step": 54
+    },
+    {
+      "epoch": 0.11434511434511435,
+      "grad_norm": 1.421875,
+      "learning_rate": 0.0002240663900414938,
+      "loss": 10.8103,
+      "step": 55
+    },
+    {
+      "epoch": 0.11642411642411643,
+      "grad_norm": 1.3359375,
+      "learning_rate": 0.00022821576763485478,
+      "loss": 10.8384,
+      "step": 56
+    },
+    {
+      "epoch": 0.11850311850311851,
+      "grad_norm": 1.3671875,
+      "learning_rate": 0.00023236514522821577,
+      "loss": 10.8279,
+      "step": 57
+    },
+    {
+      "epoch": 0.12058212058212059,
+      "grad_norm": 1.625,
+      "learning_rate": 0.00023651452282157675,
+      "loss": 10.8164,
+      "step": 58
+    },
+    {
+      "epoch": 0.12266112266112267,
+      "grad_norm": 1.3671875,
+      "learning_rate": 0.00024066390041493777,
+      "loss": 10.8323,
+      "step": 59
+    },
+    {
+      "epoch": 0.12474012474012475,
+      "grad_norm": 4.21875,
+      "learning_rate": 0.00024481327800829876,
+      "loss": 10.6432,
+      "step": 60
+    },
+    {
+      "epoch": 0.12681912681912683,
+      "grad_norm": 1.4375,
+      "learning_rate": 0.00024896265560165974,
+      "loss": 10.8131,
+      "step": 61
+    },
+    {
+      "epoch": 0.1288981288981289,
+      "grad_norm": 1.4453125,
+      "learning_rate": 0.00025311203319502073,
+      "loss": 10.8202,
+      "step": 62
+    },
+    {
+      "epoch": 0.13097713097713098,
+      "grad_norm": 2.53125,
+      "learning_rate": 0.0002572614107883817,
+      "loss": 10.5981,
+      "step": 63
+    },
+    {
+      "epoch": 0.13305613305613306,
+      "grad_norm": 1.3359375,
+      "learning_rate": 0.00026141078838174276,
+      "loss": 10.8243,
+      "step": 64
+    },
+    {
+      "epoch": 0.13513513513513514,
+      "grad_norm": 1.3125,
+      "learning_rate": 0.00026556016597510375,
+      "loss": 10.8219,
+      "step": 65
+    },
+    {
+      "epoch": 0.13721413721413722,
+      "grad_norm": 1.4140625,
+      "learning_rate": 0.00026970954356846474,
+      "loss": 10.7936,
+      "step": 66
+    },
+    {
+      "epoch": 0.1392931392931393,
+      "grad_norm": 1.3203125,
+      "learning_rate": 0.0002738589211618258,
+      "loss": 10.8229,
+      "step": 67
+    },
+    {
+      "epoch": 0.14137214137214138,
+      "grad_norm": 1.46875,
+      "learning_rate": 0.0002780082987551867,
+      "loss": 10.7755,
+      "step": 68
+    },
+    {
+      "epoch": 0.14345114345114346,
+      "grad_norm": 1.390625,
+      "learning_rate": 0.0002821576763485477,
+      "loss": 10.8028,
+      "step": 69
+    },
+    {
+      "epoch": 0.14553014553014554,
+      "grad_norm": 1.5078125,
+      "learning_rate": 0.0002863070539419087,
+      "loss": 10.7772,
+      "step": 70
+    },
+    {
+      "epoch": 0.14760914760914762,
+      "grad_norm": 1.3984375,
+      "learning_rate": 0.00029045643153526973,
+      "loss": 10.7781,
+      "step": 71
+    },
+    {
+      "epoch": 0.1496881496881497,
+      "grad_norm": 1.5234375,
+      "learning_rate": 0.0002946058091286307,
+      "loss": 10.7663,
+      "step": 72
+    },
+    {
+      "epoch": 0.15176715176715178,
+      "grad_norm": 1.328125,
+      "learning_rate": 0.0002987551867219917,
+      "loss": 10.7976,
+      "step": 73
+    },
+    {
+      "epoch": 0.15384615384615385,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.0003029045643153527,
+      "loss": 10.5842,
+      "step": 74
+    },
+    {
+      "epoch": 0.15592515592515593,
+      "grad_norm": 1.375,
+      "learning_rate": 0.00030705394190871373,
+      "loss": 10.795,
+      "step": 75
+    },
+    {
+      "epoch": 0.158004158004158,
+      "grad_norm": 1.375,
+      "learning_rate": 0.0003112033195020747,
+      "loss": 10.791,
+      "step": 76
+    },
+    {
+      "epoch": 0.1600831600831601,
+      "grad_norm": 1.484375,
+      "learning_rate": 0.00031535269709543565,
+      "loss": 10.7756,
+      "step": 77
+    },
+    {
+      "epoch": 0.16216216216216217,
+      "grad_norm": 1.421875,
+      "learning_rate": 0.00031950207468879664,
+      "loss": 10.7924,
+      "step": 78
+    },
+    {
+      "epoch": 0.16424116424116425,
+      "grad_norm": 1.4375,
+      "learning_rate": 0.0003236514522821577,
+      "loss": 10.7854,
+      "step": 79
+    },
+    {
+      "epoch": 0.16632016632016633,
+      "grad_norm": 1.4296875,
+      "learning_rate": 0.00032780082987551867,
+      "loss": 10.7742,
+      "step": 80
+    },
+    {
+      "epoch": 0.1683991683991684,
+      "grad_norm": 1.359375,
+      "learning_rate": 0.00033195020746887966,
+      "loss": 10.7746,
+      "step": 81
+    },
+    {
+      "epoch": 0.1704781704781705,
+      "grad_norm": 1.453125,
+      "learning_rate": 0.0003360995850622407,
+      "loss": 10.755,
+      "step": 82
+    },
+    {
+      "epoch": 0.17255717255717257,
+      "grad_norm": 1.390625,
+      "learning_rate": 0.0003402489626556017,
+      "loss": 10.7675,
+      "step": 83
+    },
+    {
+      "epoch": 0.17463617463617465,
+      "grad_norm": 1.6484375,
+      "learning_rate": 0.0003443983402489627,
+      "loss": 10.7589,
+      "step": 84
+    },
+    {
+      "epoch": 0.17671517671517672,
+      "grad_norm": 1.40625,
+      "learning_rate": 0.00034854771784232366,
+      "loss": 10.7611,
+      "step": 85
+    },
+    {
+      "epoch": 0.1787941787941788,
+      "grad_norm": 1.8203125,
+      "learning_rate": 0.00035269709543568465,
+      "loss": 10.775,
+      "step": 86
+    },
+    {
+      "epoch": 0.18087318087318088,
+      "grad_norm": 1.3984375,
+      "learning_rate": 0.00035684647302904564,
+      "loss": 10.7117,
+      "step": 87
+    },
+    {
+      "epoch": 0.18295218295218296,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.00036099585062240663,
+      "loss": 10.7534,
+      "step": 88
+    },
+    {
+      "epoch": 0.18503118503118504,
+      "grad_norm": 3.75,
+      "learning_rate": 0.0003651452282157676,
+      "loss": 10.7575,
+      "step": 89
+    },
+    {
+      "epoch": 0.18711018711018712,
+      "grad_norm": 1.4765625,
+      "learning_rate": 0.00036929460580912866,
+      "loss": 10.7342,
+      "step": 90
+    },
+    {
+      "epoch": 0.1891891891891892,
+      "grad_norm": 1.3671875,
+      "learning_rate": 0.00037344398340248964,
+      "loss": 10.7605,
+      "step": 91
+    },
+    {
+      "epoch": 0.19126819126819128,
+      "grad_norm": 1.40625,
+      "learning_rate": 0.00037759336099585063,
+      "loss": 10.7133,
+      "step": 92
+    },
+    {
+      "epoch": 0.19334719334719336,
+      "grad_norm": 1.40625,
+      "learning_rate": 0.0003817427385892116,
+      "loss": 10.7354,
+      "step": 93
+    },
+    {
+      "epoch": 0.19542619542619544,
+      "grad_norm": 1.3671875,
+      "learning_rate": 0.00038589211618257266,
+      "loss": 10.7689,
+      "step": 94
+    },
+    {
+      "epoch": 0.19750519750519752,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0003900414937759336,
+      "loss": 10.7537,
+      "step": 95
+    },
+    {
+      "epoch": 0.1995841995841996,
+      "grad_norm": 1.515625,
+      "learning_rate": 0.0003941908713692946,
+      "loss": 10.7218,
+      "step": 96
+    },
+    {
+      "epoch": 0.20166320166320167,
+      "grad_norm": 1.390625,
+      "learning_rate": 0.00039834024896265557,
+      "loss": 10.7424,
+      "step": 97
+    },
+    {
+      "epoch": 0.20374220374220375,
+      "grad_norm": 1.4765625,
+      "learning_rate": 0.0004024896265560166,
+      "loss": 10.7158,
+      "step": 98
+    },
+    {
+      "epoch": 0.20582120582120583,
+      "grad_norm": 1.484375,
+      "learning_rate": 0.0004066390041493776,
+      "loss": 10.7036,
+      "step": 99
+    },
+    {
+      "epoch": 0.2079002079002079,
+      "grad_norm": 1.4609375,
+      "learning_rate": 0.0004107883817427386,
+      "loss": 10.6947,
+      "step": 100
+    },
+    {
+      "epoch": 0.2079002079002079,
+      "eval_loss": 10.23933219909668,
+      "eval_runtime": 0.6087,
+      "eval_samples_per_second": 26.284,
+      "eval_steps_per_second": 3.286,
+      "step": 100
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 4810,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.54723638689792e+16,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2176662ce199a6498d6a867ed32a93ee009dc49982d0046d74255209c006f204
+size 5777