Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

README.md +67 -0
all_results.json +13 -0
config.json +105 -0
eval_results.json +8 -0
model.safetensors +3 -0
preprocessor_config.json +23 -0
runs/Mar16_18-04-43_megamax-spl-vm01/events.out.tfevents.1742128491.megamax-spl-vm01.3229474.0 +3 -0
runs/Mar16_18-07-16_megamax-spl-vm01/events.out.tfevents.1742128639.megamax-spl-vm01.3236623.0 +3 -0
runs/Mar16_18-07-16_megamax-spl-vm01/events.out.tfevents.1742130492.megamax-spl-vm01.3236623.1 +3 -0
train_results.json +8 -0
trainer_state.json +1263 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,67 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: google/vit-base-patch16-224-in21k
+tags:
+- image-classification
+- vision
+- generated_from_trainer
+metrics:
+- accuracy
+model-index:
+- name: beans_outputs
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# beans_outputs
+This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the /home/ubuntu/sdb/astitva/segmentation/classification_ds dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.8746
+- Accuracy: 0.9515
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- num_epochs: 5.0
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:--------:|
+| 2.1775        | 1.0   | 336  | 2.1821          | 0.7616   |
+| 1.4653        | 2.0   | 672  | 1.4698          | 0.8840   |
+| 1.1052        | 3.0   | 1008 | 1.0802          | 0.9304   |
+| 1.0055        | 4.0   | 1344 | 0.9248          | 0.9494   |
+| 0.7847        | 5.0   | 1680 | 0.8746          | 0.9515   |
+### Framework versions
+- Transformers 4.50.0.dev0
+- Pytorch 2.6.0+cu124
+- Datasets 3.3.2
+- Tokenizers 0.21.0

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 5.0,
+    "eval_accuracy": 0.9514767932489452,
+    "eval_loss": 0.8745647668838501,
+    "eval_runtime": 51.5176,
+    "eval_samples_per_second": 9.201,
+    "eval_steps_per_second": 1.165,
+    "total_flos": 1.0410532148820787e+18,
+    "train_loss": 1.5688391100792658,
+    "train_runtime": 1801.1044,
+    "train_samples_per_second": 7.457,
+    "train_steps_per_second": 0.933
+}

config.json ADDED Viewed

	@@ -0,0 +1,105 @@

+{
+  "_name_or_path": "google/vit-base-patch16-224-in21k",
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "encoder_stride": 16,
+  "finetuning_task": "image-classification",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "aloevera",
+    "1": "arjun",
+    "10": "coffee",
+    "11": "coriander",
+    "12": "curry",
+    "13": "giloy",
+    "14": "ginger",
+    "15": "glochidion",
+    "16": "gotu kola",
+    "17": "hibiscus",
+    "18": "jasmine",
+    "19": "lemon",
+    "2": "ashwagandha",
+    "20": "madar",
+    "21": "mango",
+    "22": "marigold",
+    "23": "mint",
+    "24": "moringa",
+    "25": "naruneendi",
+    "26": "neem",
+    "27": "onion",
+    "28": "papaya",
+    "29": "ricinus",
+    "3": "babool",
+    "30": "rose",
+    "31": "sarpagandha",
+    "32": "shatavari",
+    "33": "stereoserpum",
+    "34": "tomato",
+    "35": "tulsi",
+    "36": "turmeric",
+    "37": "wedelia",
+    "4": "bael",
+    "5": "bakuchi",
+    "6": "barberry",
+    "7": "bhilawa",
+    "8": "bhringraj",
+    "9": "chilly"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "aloevera": "0",
+    "arjun": "1",
+    "ashwagandha": "2",
+    "babool": "3",
+    "bael": "4",
+    "bakuchi": "5",
+    "barberry": "6",
+    "bhilawa": "7",
+    "bhringraj": "8",
+    "chilly": "9",
+    "coffee": "10",
+    "coriander": "11",
+    "curry": "12",
+    "giloy": "13",
+    "ginger": "14",
+    "glochidion": "15",
+    "gotu kola": "16",
+    "hibiscus": "17",
+    "jasmine": "18",
+    "lemon": "19",
+    "madar": "20",
+    "mango": "21",
+    "marigold": "22",
+    "mint": "23",
+    "moringa": "24",
+    "naruneendi": "25",
+    "neem": "26",
+    "onion": "27",
+    "papaya": "28",
+    "ricinus": "29",
+    "rose": "30",
+    "sarpagandha": "31",
+    "shatavari": "32",
+    "stereoserpum": "33",
+    "tomato": "34",
+    "tulsi": "35",
+    "turmeric": "36",
+    "wedelia": "37"
+  },
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 12,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0.dev0"
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 5.0,
+    "eval_accuracy": 0.9514767932489452,
+    "eval_loss": 0.8745647668838501,
+    "eval_runtime": 51.5176,
+    "eval_samples_per_second": 9.201,
+    "eval_steps_per_second": 1.165
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:405aae6925ebb7690e4fc69bb192018b73635b0d76ba5fef4ea3dba7998a9b01
+size 343334720

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "ViTImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

runs/Mar16_18-04-43_megamax-spl-vm01/events.out.tfevents.1742128491.megamax-spl-vm01.3229474.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc49302704f3016139389073f1fd079d7f7097dcc4196b717235d4943c8adf22
+size 7663

runs/Mar16_18-07-16_megamax-spl-vm01/events.out.tfevents.1742128639.megamax-spl-vm01.3236623.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8a4193d1d23f9ead30fac7487299d057dceeb75b5f0548eb3f7aeaa15c2b795
+size 43996

runs/Mar16_18-07-16_megamax-spl-vm01/events.out.tfevents.1742130492.megamax-spl-vm01.3236623.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40a8bff43120b59a5c0964a06324ad8fabed2569bbce79d2ddd4215bc8308dce
+size 411

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 5.0,
+    "total_flos": 1.0410532148820787e+18,
+    "train_loss": 1.5688391100792658,
+    "train_runtime": 1801.1044,
+    "train_samples_per_second": 7.457,
+    "train_steps_per_second": 0.933
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1263 @@

+{
+  "best_metric": 0.8745647668838501,
+  "best_model_checkpoint": "./beans_outputs/checkpoint-1680",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 1680,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02976190476190476,
+      "grad_norm": 3.170905113220215,
+      "learning_rate": 1.9880952380952384e-05,
+      "loss": 3.6048,
+      "step": 10
+    },
+    {
+      "epoch": 0.05952380952380952,
+      "grad_norm": 2.51796555519104,
+      "learning_rate": 1.9761904761904763e-05,
+      "loss": 3.5551,
+      "step": 20
+    },
+    {
+      "epoch": 0.08928571428571429,
+      "grad_norm": 2.868872880935669,
+      "learning_rate": 1.9642857142857145e-05,
+      "loss": 3.4868,
+      "step": 30
+    },
+    {
+      "epoch": 0.11904761904761904,
+      "grad_norm": 3.2086801528930664,
+      "learning_rate": 1.9523809523809524e-05,
+      "loss": 3.4105,
+      "step": 40
+    },
+    {
+      "epoch": 0.1488095238095238,
+      "grad_norm": 2.825397253036499,
+      "learning_rate": 1.9404761904761906e-05,
+      "loss": 3.3538,
+      "step": 50
+    },
+    {
+      "epoch": 0.17857142857142858,
+      "grad_norm": 3.486938238143921,
+      "learning_rate": 1.928571428571429e-05,
+      "loss": 3.3201,
+      "step": 60
+    },
+    {
+      "epoch": 0.20833333333333334,
+      "grad_norm": 2.802475929260254,
+      "learning_rate": 1.916666666666667e-05,
+      "loss": 3.2432,
+      "step": 70
+    },
+    {
+      "epoch": 0.23809523809523808,
+      "grad_norm": 2.789459228515625,
+      "learning_rate": 1.904761904761905e-05,
+      "loss": 3.2041,
+      "step": 80
+    },
+    {
+      "epoch": 0.26785714285714285,
+      "grad_norm": 3.008307933807373,
+      "learning_rate": 1.892857142857143e-05,
+      "loss": 3.1679,
+      "step": 90
+    },
+    {
+      "epoch": 0.2976190476190476,
+      "grad_norm": 2.6487619876861572,
+      "learning_rate": 1.880952380952381e-05,
+      "loss": 3.1249,
+      "step": 100
+    },
+    {
+      "epoch": 0.3273809523809524,
+      "grad_norm": 2.947179079055786,
+      "learning_rate": 1.8690476190476193e-05,
+      "loss": 3.0909,
+      "step": 110
+    },
+    {
+      "epoch": 0.35714285714285715,
+      "grad_norm": 3.1243131160736084,
+      "learning_rate": 1.8571428571428575e-05,
+      "loss": 3.0953,
+      "step": 120
+    },
+    {
+      "epoch": 0.3869047619047619,
+      "grad_norm": 2.9400837421417236,
+      "learning_rate": 1.8452380952380954e-05,
+      "loss": 2.9629,
+      "step": 130
+    },
+    {
+      "epoch": 0.4166666666666667,
+      "grad_norm": 2.7061338424682617,
+      "learning_rate": 1.8333333333333333e-05,
+      "loss": 2.9307,
+      "step": 140
+    },
+    {
+      "epoch": 0.44642857142857145,
+      "grad_norm": 2.6359243392944336,
+      "learning_rate": 1.8214285714285715e-05,
+      "loss": 2.8238,
+      "step": 150
+    },
+    {
+      "epoch": 0.47619047619047616,
+      "grad_norm": 2.740408420562744,
+      "learning_rate": 1.8095238095238097e-05,
+      "loss": 2.8961,
+      "step": 160
+    },
+    {
+      "epoch": 0.5059523809523809,
+      "grad_norm": 2.858968496322632,
+      "learning_rate": 1.797619047619048e-05,
+      "loss": 2.7505,
+      "step": 170
+    },
+    {
+      "epoch": 0.5357142857142857,
+      "grad_norm": 2.7578256130218506,
+      "learning_rate": 1.785714285714286e-05,
+      "loss": 2.7989,
+      "step": 180
+    },
+    {
+      "epoch": 0.5654761904761905,
+      "grad_norm": 2.9766931533813477,
+      "learning_rate": 1.7738095238095237e-05,
+      "loss": 2.6722,
+      "step": 190
+    },
+    {
+      "epoch": 0.5952380952380952,
+      "grad_norm": 2.7900352478027344,
+      "learning_rate": 1.761904761904762e-05,
+      "loss": 2.7213,
+      "step": 200
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 3.004939556121826,
+      "learning_rate": 1.7500000000000002e-05,
+      "loss": 2.7287,
+      "step": 210
+    },
+    {
+      "epoch": 0.6547619047619048,
+      "grad_norm": 2.7375917434692383,
+      "learning_rate": 1.7380952380952384e-05,
+      "loss": 2.6691,
+      "step": 220
+    },
+    {
+      "epoch": 0.6845238095238095,
+      "grad_norm": 3.2530713081359863,
+      "learning_rate": 1.7261904761904763e-05,
+      "loss": 2.5742,
+      "step": 230
+    },
+    {
+      "epoch": 0.7142857142857143,
+      "grad_norm": 3.0463545322418213,
+      "learning_rate": 1.7142857142857142e-05,
+      "loss": 2.4523,
+      "step": 240
+    },
+    {
+      "epoch": 0.7440476190476191,
+      "grad_norm": 3.0471720695495605,
+      "learning_rate": 1.7023809523809524e-05,
+      "loss": 2.4592,
+      "step": 250
+    },
+    {
+      "epoch": 0.7738095238095238,
+      "grad_norm": 3.4415907859802246,
+      "learning_rate": 1.6904761904761906e-05,
+      "loss": 2.4316,
+      "step": 260
+    },
+    {
+      "epoch": 0.8035714285714286,
+      "grad_norm": 2.830673933029175,
+      "learning_rate": 1.678571428571429e-05,
+      "loss": 2.3903,
+      "step": 270
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 3.584303617477417,
+      "learning_rate": 1.6666666666666667e-05,
+      "loss": 2.4643,
+      "step": 280
+    },
+    {
+      "epoch": 0.8630952380952381,
+      "grad_norm": 3.9748589992523193,
+      "learning_rate": 1.6547619047619046e-05,
+      "loss": 2.3237,
+      "step": 290
+    },
+    {
+      "epoch": 0.8928571428571429,
+      "grad_norm": 2.929922103881836,
+      "learning_rate": 1.642857142857143e-05,
+      "loss": 2.2639,
+      "step": 300
+    },
+    {
+      "epoch": 0.9226190476190477,
+      "grad_norm": 4.647745132446289,
+      "learning_rate": 1.630952380952381e-05,
+      "loss": 2.4637,
+      "step": 310
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 3.6543118953704834,
+      "learning_rate": 1.6190476190476193e-05,
+      "loss": 2.2519,
+      "step": 320
+    },
+    {
+      "epoch": 0.9821428571428571,
+      "grad_norm": 3.3143322467803955,
+      "learning_rate": 1.6071428571428572e-05,
+      "loss": 2.1775,
+      "step": 330
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7616033755274262,
+      "eval_loss": 2.1820600032806396,
+      "eval_runtime": 50.7645,
+      "eval_samples_per_second": 9.337,
+      "eval_steps_per_second": 1.182,
+      "step": 336
+    },
+    {
+      "epoch": 1.0119047619047619,
+      "grad_norm": 3.666236639022827,
+      "learning_rate": 1.5952380952380954e-05,
+      "loss": 2.1187,
+      "step": 340
+    },
+    {
+      "epoch": 1.0416666666666667,
+      "grad_norm": 3.736830472946167,
+      "learning_rate": 1.5833333333333333e-05,
+      "loss": 2.1312,
+      "step": 350
+    },
+    {
+      "epoch": 1.0714285714285714,
+      "grad_norm": 3.002455711364746,
+      "learning_rate": 1.5714285714285715e-05,
+      "loss": 2.2274,
+      "step": 360
+    },
+    {
+      "epoch": 1.1011904761904763,
+      "grad_norm": 3.2685108184814453,
+      "learning_rate": 1.5595238095238098e-05,
+      "loss": 2.1347,
+      "step": 370
+    },
+    {
+      "epoch": 1.130952380952381,
+      "grad_norm": 3.4998621940612793,
+      "learning_rate": 1.5476190476190476e-05,
+      "loss": 2.0757,
+      "step": 380
+    },
+    {
+      "epoch": 1.1607142857142858,
+      "grad_norm": 3.306267738342285,
+      "learning_rate": 1.535714285714286e-05,
+      "loss": 2.0177,
+      "step": 390
+    },
+    {
+      "epoch": 1.1904761904761905,
+      "grad_norm": 3.8774032592773438,
+      "learning_rate": 1.523809523809524e-05,
+      "loss": 1.9748,
+      "step": 400
+    },
+    {
+      "epoch": 1.2202380952380953,
+      "grad_norm": 2.662797212600708,
+      "learning_rate": 1.511904761904762e-05,
+      "loss": 1.9628,
+      "step": 410
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 3.9353742599487305,
+      "learning_rate": 1.5000000000000002e-05,
+      "loss": 2.0104,
+      "step": 420
+    },
+    {
+      "epoch": 1.2797619047619047,
+      "grad_norm": 3.3460521697998047,
+      "learning_rate": 1.4880952380952383e-05,
+      "loss": 2.0678,
+      "step": 430
+    },
+    {
+      "epoch": 1.3095238095238095,
+      "grad_norm": 3.0211353302001953,
+      "learning_rate": 1.4761904761904763e-05,
+      "loss": 2.0294,
+      "step": 440
+    },
+    {
+      "epoch": 1.3392857142857144,
+      "grad_norm": 2.827756404876709,
+      "learning_rate": 1.4642857142857144e-05,
+      "loss": 1.9104,
+      "step": 450
+    },
+    {
+      "epoch": 1.369047619047619,
+      "grad_norm": 2.606844663619995,
+      "learning_rate": 1.4523809523809524e-05,
+      "loss": 1.933,
+      "step": 460
+    },
+    {
+      "epoch": 1.3988095238095237,
+      "grad_norm": 3.994950294494629,
+      "learning_rate": 1.4404761904761907e-05,
+      "loss": 1.9977,
+      "step": 470
+    },
+    {
+      "epoch": 1.4285714285714286,
+      "grad_norm": 3.6433207988739014,
+      "learning_rate": 1.4285714285714287e-05,
+      "loss": 1.897,
+      "step": 480
+    },
+    {
+      "epoch": 1.4583333333333333,
+      "grad_norm": 3.1899826526641846,
+      "learning_rate": 1.416666666666667e-05,
+      "loss": 1.9046,
+      "step": 490
+    },
+    {
+      "epoch": 1.4880952380952381,
+      "grad_norm": 3.352928638458252,
+      "learning_rate": 1.4047619047619048e-05,
+      "loss": 1.7378,
+      "step": 500
+    },
+    {
+      "epoch": 1.5178571428571428,
+      "grad_norm": 4.73577880859375,
+      "learning_rate": 1.3928571428571429e-05,
+      "loss": 1.7998,
+      "step": 510
+    },
+    {
+      "epoch": 1.5476190476190477,
+      "grad_norm": 3.118739366531372,
+      "learning_rate": 1.3809523809523811e-05,
+      "loss": 1.7316,
+      "step": 520
+    },
+    {
+      "epoch": 1.5773809523809523,
+      "grad_norm": 2.617877721786499,
+      "learning_rate": 1.3690476190476192e-05,
+      "loss": 1.6478,
+      "step": 530
+    },
+    {
+      "epoch": 1.6071428571428572,
+      "grad_norm": 3.3894600868225098,
+      "learning_rate": 1.3571428571428574e-05,
+      "loss": 1.7311,
+      "step": 540
+    },
+    {
+      "epoch": 1.6369047619047619,
+      "grad_norm": 4.088054656982422,
+      "learning_rate": 1.3452380952380954e-05,
+      "loss": 1.5008,
+      "step": 550
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 3.2209737300872803,
+      "learning_rate": 1.3333333333333333e-05,
+      "loss": 1.6994,
+      "step": 560
+    },
+    {
+      "epoch": 1.6964285714285714,
+      "grad_norm": 3.8286681175231934,
+      "learning_rate": 1.3214285714285716e-05,
+      "loss": 1.6879,
+      "step": 570
+    },
+    {
+      "epoch": 1.7261904761904763,
+      "grad_norm": 2.611720561981201,
+      "learning_rate": 1.3095238095238096e-05,
+      "loss": 1.6061,
+      "step": 580
+    },
+    {
+      "epoch": 1.755952380952381,
+      "grad_norm": 2.898097276687622,
+      "learning_rate": 1.2976190476190478e-05,
+      "loss": 1.5223,
+      "step": 590
+    },
+    {
+      "epoch": 1.7857142857142856,
+      "grad_norm": 2.2522895336151123,
+      "learning_rate": 1.2857142857142859e-05,
+      "loss": 1.5095,
+      "step": 600
+    },
+    {
+      "epoch": 1.8154761904761905,
+      "grad_norm": 3.5610804557800293,
+      "learning_rate": 1.2738095238095238e-05,
+      "loss": 1.6524,
+      "step": 610
+    },
+    {
+      "epoch": 1.8452380952380953,
+      "grad_norm": 3.532130002975464,
+      "learning_rate": 1.261904761904762e-05,
+      "loss": 1.5345,
+      "step": 620
+    },
+    {
+      "epoch": 1.875,
+      "grad_norm": 3.8648953437805176,
+      "learning_rate": 1.25e-05,
+      "loss": 1.691,
+      "step": 630
+    },
+    {
+      "epoch": 1.9047619047619047,
+      "grad_norm": 2.4936046600341797,
+      "learning_rate": 1.2380952380952383e-05,
+      "loss": 1.4573,
+      "step": 640
+    },
+    {
+      "epoch": 1.9345238095238095,
+      "grad_norm": 3.499699592590332,
+      "learning_rate": 1.2261904761904763e-05,
+      "loss": 1.5181,
+      "step": 650
+    },
+    {
+      "epoch": 1.9642857142857144,
+      "grad_norm": 2.7815959453582764,
+      "learning_rate": 1.2142857142857142e-05,
+      "loss": 1.4333,
+      "step": 660
+    },
+    {
+      "epoch": 1.994047619047619,
+      "grad_norm": 3.007183790206909,
+      "learning_rate": 1.2023809523809525e-05,
+      "loss": 1.4653,
+      "step": 670
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8839662447257384,
+      "eval_loss": 1.4698303937911987,
+      "eval_runtime": 51.4369,
+      "eval_samples_per_second": 9.215,
+      "eval_steps_per_second": 1.166,
+      "step": 672
+    },
+    {
+      "epoch": 2.0238095238095237,
+      "grad_norm": 3.4663267135620117,
+      "learning_rate": 1.1904761904761905e-05,
+      "loss": 1.4428,
+      "step": 680
+    },
+    {
+      "epoch": 2.0535714285714284,
+      "grad_norm": 2.2934768199920654,
+      "learning_rate": 1.1785714285714287e-05,
+      "loss": 1.4135,
+      "step": 690
+    },
+    {
+      "epoch": 2.0833333333333335,
+      "grad_norm": 2.601954221725464,
+      "learning_rate": 1.1666666666666668e-05,
+      "loss": 1.456,
+      "step": 700
+    },
+    {
+      "epoch": 2.113095238095238,
+      "grad_norm": 3.2254600524902344,
+      "learning_rate": 1.1547619047619047e-05,
+      "loss": 1.5227,
+      "step": 710
+    },
+    {
+      "epoch": 2.142857142857143,
+      "grad_norm": 3.2958316802978516,
+      "learning_rate": 1.1428571428571429e-05,
+      "loss": 1.4248,
+      "step": 720
+    },
+    {
+      "epoch": 2.1726190476190474,
+      "grad_norm": 4.993536472320557,
+      "learning_rate": 1.130952380952381e-05,
+      "loss": 1.4717,
+      "step": 730
+    },
+    {
+      "epoch": 2.2023809523809526,
+      "grad_norm": 3.3640084266662598,
+      "learning_rate": 1.1190476190476192e-05,
+      "loss": 1.4265,
+      "step": 740
+    },
+    {
+      "epoch": 2.232142857142857,
+      "grad_norm": 2.6835250854492188,
+      "learning_rate": 1.1071428571428572e-05,
+      "loss": 1.408,
+      "step": 750
+    },
+    {
+      "epoch": 2.261904761904762,
+      "grad_norm": 3.8518381118774414,
+      "learning_rate": 1.0952380952380955e-05,
+      "loss": 1.2666,
+      "step": 760
+    },
+    {
+      "epoch": 2.2916666666666665,
+      "grad_norm": 3.553366184234619,
+      "learning_rate": 1.0833333333333334e-05,
+      "loss": 1.4052,
+      "step": 770
+    },
+    {
+      "epoch": 2.3214285714285716,
+      "grad_norm": 2.657440423965454,
+      "learning_rate": 1.0714285714285714e-05,
+      "loss": 1.3953,
+      "step": 780
+    },
+    {
+      "epoch": 2.3511904761904763,
+      "grad_norm": 4.050617694854736,
+      "learning_rate": 1.0595238095238096e-05,
+      "loss": 1.3073,
+      "step": 790
+    },
+    {
+      "epoch": 2.380952380952381,
+      "grad_norm": 3.039287567138672,
+      "learning_rate": 1.0476190476190477e-05,
+      "loss": 1.3765,
+      "step": 800
+    },
+    {
+      "epoch": 2.4107142857142856,
+      "grad_norm": 3.350076913833618,
+      "learning_rate": 1.0357142857142859e-05,
+      "loss": 1.2713,
+      "step": 810
+    },
+    {
+      "epoch": 2.4404761904761907,
+      "grad_norm": 4.112967491149902,
+      "learning_rate": 1.0238095238095238e-05,
+      "loss": 1.3557,
+      "step": 820
+    },
+    {
+      "epoch": 2.4702380952380953,
+      "grad_norm": 2.587895154953003,
+      "learning_rate": 1.011904761904762e-05,
+      "loss": 1.2106,
+      "step": 830
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 2.2189221382141113,
+      "learning_rate": 1e-05,
+      "loss": 1.1529,
+      "step": 840
+    },
+    {
+      "epoch": 2.5297619047619047,
+      "grad_norm": 1.7763313055038452,
+      "learning_rate": 9.880952380952381e-06,
+      "loss": 1.2066,
+      "step": 850
+    },
+    {
+      "epoch": 2.5595238095238093,
+      "grad_norm": 2.5652577877044678,
+      "learning_rate": 9.761904761904762e-06,
+      "loss": 1.2206,
+      "step": 860
+    },
+    {
+      "epoch": 2.5892857142857144,
+      "grad_norm": 2.4081642627716064,
+      "learning_rate": 9.642857142857144e-06,
+      "loss": 1.2288,
+      "step": 870
+    },
+    {
+      "epoch": 2.619047619047619,
+      "grad_norm": 3.4448933601379395,
+      "learning_rate": 9.523809523809525e-06,
+      "loss": 1.2764,
+      "step": 880
+    },
+    {
+      "epoch": 2.6488095238095237,
+      "grad_norm": 3.65535044670105,
+      "learning_rate": 9.404761904761905e-06,
+      "loss": 1.1818,
+      "step": 890
+    },
+    {
+      "epoch": 2.678571428571429,
+      "grad_norm": 2.902886152267456,
+      "learning_rate": 9.285714285714288e-06,
+      "loss": 1.2662,
+      "step": 900
+    },
+    {
+      "epoch": 2.7083333333333335,
+      "grad_norm": 2.8251378536224365,
+      "learning_rate": 9.166666666666666e-06,
+      "loss": 1.1246,
+      "step": 910
+    },
+    {
+      "epoch": 2.738095238095238,
+      "grad_norm": 2.1443264484405518,
+      "learning_rate": 9.047619047619049e-06,
+      "loss": 1.2486,
+      "step": 920
+    },
+    {
+      "epoch": 2.767857142857143,
+      "grad_norm": 4.930934429168701,
+      "learning_rate": 8.92857142857143e-06,
+      "loss": 1.1865,
+      "step": 930
+    },
+    {
+      "epoch": 2.7976190476190474,
+      "grad_norm": 3.2018985748291016,
+      "learning_rate": 8.80952380952381e-06,
+      "loss": 1.1047,
+      "step": 940
+    },
+    {
+      "epoch": 2.8273809523809526,
+      "grad_norm": 3.2998268604278564,
+      "learning_rate": 8.690476190476192e-06,
+      "loss": 1.2098,
+      "step": 950
+    },
+    {
+      "epoch": 2.857142857142857,
+      "grad_norm": 2.1316542625427246,
+      "learning_rate": 8.571428571428571e-06,
+      "loss": 1.0918,
+      "step": 960
+    },
+    {
+      "epoch": 2.886904761904762,
+      "grad_norm": 3.8014087677001953,
+      "learning_rate": 8.452380952380953e-06,
+      "loss": 1.1139,
+      "step": 970
+    },
+    {
+      "epoch": 2.9166666666666665,
+      "grad_norm": 2.8320999145507812,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 1.213,
+      "step": 980
+    },
+    {
+      "epoch": 2.946428571428571,
+      "grad_norm": 3.016481876373291,
+      "learning_rate": 8.214285714285714e-06,
+      "loss": 1.1398,
+      "step": 990
+    },
+    {
+      "epoch": 2.9761904761904763,
+      "grad_norm": 3.9006187915802,
+      "learning_rate": 8.095238095238097e-06,
+      "loss": 1.1052,
+      "step": 1000
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.930379746835443,
+      "eval_loss": 1.0801581144332886,
+      "eval_runtime": 51.0077,
+      "eval_samples_per_second": 9.293,
+      "eval_steps_per_second": 1.176,
+      "step": 1008
+    },
+    {
+      "epoch": 3.005952380952381,
+      "grad_norm": 2.796464204788208,
+      "learning_rate": 7.976190476190477e-06,
+      "loss": 1.1341,
+      "step": 1010
+    },
+    {
+      "epoch": 3.0357142857142856,
+      "grad_norm": 2.1846368312835693,
+      "learning_rate": 7.857142857142858e-06,
+      "loss": 1.173,
+      "step": 1020
+    },
+    {
+      "epoch": 3.0654761904761907,
+      "grad_norm": 3.3909096717834473,
+      "learning_rate": 7.738095238095238e-06,
+      "loss": 1.0198,
+      "step": 1030
+    },
+    {
+      "epoch": 3.0952380952380953,
+      "grad_norm": 3.5887138843536377,
+      "learning_rate": 7.61904761904762e-06,
+      "loss": 1.0729,
+      "step": 1040
+    },
+    {
+      "epoch": 3.125,
+      "grad_norm": 2.7871737480163574,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 0.9676,
+      "step": 1050
+    },
+    {
+      "epoch": 3.1547619047619047,
+      "grad_norm": 3.3368754386901855,
+      "learning_rate": 7.380952380952382e-06,
+      "loss": 0.9599,
+      "step": 1060
+    },
+    {
+      "epoch": 3.1845238095238093,
+      "grad_norm": 3.748992919921875,
+      "learning_rate": 7.261904761904762e-06,
+      "loss": 1.1599,
+      "step": 1070
+    },
+    {
+      "epoch": 3.2142857142857144,
+      "grad_norm": 4.470694065093994,
+      "learning_rate": 7.1428571428571436e-06,
+      "loss": 1.155,
+      "step": 1080
+    },
+    {
+      "epoch": 3.244047619047619,
+      "grad_norm": 1.8315823078155518,
+      "learning_rate": 7.023809523809524e-06,
+      "loss": 0.979,
+      "step": 1090
+    },
+    {
+      "epoch": 3.2738095238095237,
+      "grad_norm": 2.505209445953369,
+      "learning_rate": 6.9047619047619055e-06,
+      "loss": 1.142,
+      "step": 1100
+    },
+    {
+      "epoch": 3.3035714285714284,
+      "grad_norm": 3.056353807449341,
+      "learning_rate": 6.785714285714287e-06,
+      "loss": 1.0072,
+      "step": 1110
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 3.9302310943603516,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 1.0705,
+      "step": 1120
+    },
+    {
+      "epoch": 3.363095238095238,
+      "grad_norm": 4.6520490646362305,
+      "learning_rate": 6.547619047619048e-06,
+      "loss": 1.0325,
+      "step": 1130
+    },
+    {
+      "epoch": 3.392857142857143,
+      "grad_norm": 3.9381701946258545,
+      "learning_rate": 6.4285714285714295e-06,
+      "loss": 0.9674,
+      "step": 1140
+    },
+    {
+      "epoch": 3.4226190476190474,
+      "grad_norm": 5.080965042114258,
+      "learning_rate": 6.30952380952381e-06,
+      "loss": 0.9812,
+      "step": 1150
+    },
+    {
+      "epoch": 3.4523809523809526,
+      "grad_norm": 4.649317264556885,
+      "learning_rate": 6.1904761904761914e-06,
+      "loss": 1.1093,
+      "step": 1160
+    },
+    {
+      "epoch": 3.482142857142857,
+      "grad_norm": 5.5956315994262695,
+      "learning_rate": 6.071428571428571e-06,
+      "loss": 1.0133,
+      "step": 1170
+    },
+    {
+      "epoch": 3.511904761904762,
+      "grad_norm": 4.99602746963501,
+      "learning_rate": 5.9523809523809525e-06,
+      "loss": 1.075,
+      "step": 1180
+    },
+    {
+      "epoch": 3.5416666666666665,
+      "grad_norm": 3.875300407409668,
+      "learning_rate": 5.833333333333334e-06,
+      "loss": 1.1469,
+      "step": 1190
+    },
+    {
+      "epoch": 3.571428571428571,
+      "grad_norm": 2.9351279735565186,
+      "learning_rate": 5.7142857142857145e-06,
+      "loss": 1.1746,
+      "step": 1200
+    },
+    {
+      "epoch": 3.6011904761904763,
+      "grad_norm": 3.581909418106079,
+      "learning_rate": 5.595238095238096e-06,
+      "loss": 1.0452,
+      "step": 1210
+    },
+    {
+      "epoch": 3.630952380952381,
+      "grad_norm": 2.4383697509765625,
+      "learning_rate": 5.476190476190477e-06,
+      "loss": 0.884,
+      "step": 1220
+    },
+    {
+      "epoch": 3.6607142857142856,
+      "grad_norm": 3.386600971221924,
+      "learning_rate": 5.357142857142857e-06,
+      "loss": 0.9479,
+      "step": 1230
+    },
+    {
+      "epoch": 3.6904761904761907,
+      "grad_norm": 1.5890535116195679,
+      "learning_rate": 5.2380952380952384e-06,
+      "loss": 0.8953,
+      "step": 1240
+    },
+    {
+      "epoch": 3.7202380952380953,
+      "grad_norm": 2.729491710662842,
+      "learning_rate": 5.119047619047619e-06,
+      "loss": 0.9071,
+      "step": 1250
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 4.265748977661133,
+      "learning_rate": 5e-06,
+      "loss": 1.0496,
+      "step": 1260
+    },
+    {
+      "epoch": 3.7797619047619047,
+      "grad_norm": 3.6234512329101562,
+      "learning_rate": 4.880952380952381e-06,
+      "loss": 0.9945,
+      "step": 1270
+    },
+    {
+      "epoch": 3.8095238095238093,
+      "grad_norm": 3.0296449661254883,
+      "learning_rate": 4.761904761904762e-06,
+      "loss": 1.0592,
+      "step": 1280
+    },
+    {
+      "epoch": 3.8392857142857144,
+      "grad_norm": 3.7550673484802246,
+      "learning_rate": 4.642857142857144e-06,
+      "loss": 0.9102,
+      "step": 1290
+    },
+    {
+      "epoch": 3.869047619047619,
+      "grad_norm": 2.3732712268829346,
+      "learning_rate": 4.523809523809524e-06,
+      "loss": 0.9721,
+      "step": 1300
+    },
+    {
+      "epoch": 3.8988095238095237,
+      "grad_norm": 4.049142360687256,
+      "learning_rate": 4.404761904761905e-06,
+      "loss": 0.9409,
+      "step": 1310
+    },
+    {
+      "epoch": 3.928571428571429,
+      "grad_norm": 2.1877949237823486,
+      "learning_rate": 4.2857142857142855e-06,
+      "loss": 1.0235,
+      "step": 1320
+    },
+    {
+      "epoch": 3.9583333333333335,
+      "grad_norm": 1.8449411392211914,
+      "learning_rate": 4.166666666666667e-06,
+      "loss": 0.978,
+      "step": 1330
+    },
+    {
+      "epoch": 3.988095238095238,
+      "grad_norm": 2.8841190338134766,
+      "learning_rate": 4.047619047619048e-06,
+      "loss": 1.0055,
+      "step": 1340
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9493670886075949,
+      "eval_loss": 0.9248189926147461,
+      "eval_runtime": 51.0423,
+      "eval_samples_per_second": 9.286,
+      "eval_steps_per_second": 1.175,
+      "step": 1344
+    },
+    {
+      "epoch": 4.017857142857143,
+      "grad_norm": 2.242076873779297,
+      "learning_rate": 3.928571428571429e-06,
+      "loss": 0.9244,
+      "step": 1350
+    },
+    {
+      "epoch": 4.0476190476190474,
+      "grad_norm": 1.98090660572052,
+      "learning_rate": 3.80952380952381e-06,
+      "loss": 0.8568,
+      "step": 1360
+    },
+    {
+      "epoch": 4.0773809523809526,
+      "grad_norm": 3.927706718444824,
+      "learning_rate": 3.690476190476191e-06,
+      "loss": 0.9644,
+      "step": 1370
+    },
+    {
+      "epoch": 4.107142857142857,
+      "grad_norm": 2.3780994415283203,
+      "learning_rate": 3.5714285714285718e-06,
+      "loss": 0.97,
+      "step": 1380
+    },
+    {
+      "epoch": 4.136904761904762,
+      "grad_norm": 2.21608304977417,
+      "learning_rate": 3.4523809523809528e-06,
+      "loss": 0.9728,
+      "step": 1390
+    },
+    {
+      "epoch": 4.166666666666667,
+      "grad_norm": 6.764073848724365,
+      "learning_rate": 3.3333333333333333e-06,
+      "loss": 0.8729,
+      "step": 1400
+    },
+    {
+      "epoch": 4.196428571428571,
+      "grad_norm": 1.5746071338653564,
+      "learning_rate": 3.2142857142857147e-06,
+      "loss": 0.7702,
+      "step": 1410
+    },
+    {
+      "epoch": 4.226190476190476,
+      "grad_norm": 1.8241825103759766,
+      "learning_rate": 3.0952380952380957e-06,
+      "loss": 0.9121,
+      "step": 1420
+    },
+    {
+      "epoch": 4.255952380952381,
+      "grad_norm": 3.9683926105499268,
+      "learning_rate": 2.9761904761904763e-06,
+      "loss": 0.8749,
+      "step": 1430
+    },
+    {
+      "epoch": 4.285714285714286,
+      "grad_norm": 1.5732113122940063,
+      "learning_rate": 2.8571428571428573e-06,
+      "loss": 0.9421,
+      "step": 1440
+    },
+    {
+      "epoch": 4.315476190476191,
+      "grad_norm": 2.5848405361175537,
+      "learning_rate": 2.7380952380952387e-06,
+      "loss": 0.9617,
+      "step": 1450
+    },
+    {
+      "epoch": 4.345238095238095,
+      "grad_norm": 3.7017910480499268,
+      "learning_rate": 2.6190476190476192e-06,
+      "loss": 0.905,
+      "step": 1460
+    },
+    {
+      "epoch": 4.375,
+      "grad_norm": 5.973739147186279,
+      "learning_rate": 2.5e-06,
+      "loss": 0.89,
+      "step": 1470
+    },
+    {
+      "epoch": 4.404761904761905,
+      "grad_norm": 1.8716737031936646,
+      "learning_rate": 2.380952380952381e-06,
+      "loss": 0.9635,
+      "step": 1480
+    },
+    {
+      "epoch": 4.434523809523809,
+      "grad_norm": 3.3029792308807373,
+      "learning_rate": 2.261904761904762e-06,
+      "loss": 0.933,
+      "step": 1490
+    },
+    {
+      "epoch": 4.464285714285714,
+      "grad_norm": 2.5819740295410156,
+      "learning_rate": 2.1428571428571427e-06,
+      "loss": 0.8899,
+      "step": 1500
+    },
+    {
+      "epoch": 4.494047619047619,
+      "grad_norm": 3.5635058879852295,
+      "learning_rate": 2.023809523809524e-06,
+      "loss": 0.8539,
+      "step": 1510
+    },
+    {
+      "epoch": 4.523809523809524,
+      "grad_norm": 2.5672874450683594,
+      "learning_rate": 1.904761904761905e-06,
+      "loss": 1.0972,
+      "step": 1520
+    },
+    {
+      "epoch": 4.553571428571429,
+      "grad_norm": 5.11098051071167,
+      "learning_rate": 1.7857142857142859e-06,
+      "loss": 0.9862,
+      "step": 1530
+    },
+    {
+      "epoch": 4.583333333333333,
+      "grad_norm": 2.5244972705841064,
+      "learning_rate": 1.6666666666666667e-06,
+      "loss": 1.0213,
+      "step": 1540
+    },
+    {
+      "epoch": 4.613095238095238,
+      "grad_norm": 3.5044398307800293,
+      "learning_rate": 1.5476190476190479e-06,
+      "loss": 0.9144,
+      "step": 1550
+    },
+    {
+      "epoch": 4.642857142857143,
+      "grad_norm": 2.4903435707092285,
+      "learning_rate": 1.4285714285714286e-06,
+      "loss": 0.9331,
+      "step": 1560
+    },
+    {
+      "epoch": 4.6726190476190474,
+      "grad_norm": 3.208696126937866,
+      "learning_rate": 1.3095238095238096e-06,
+      "loss": 1.013,
+      "step": 1570
+    },
+    {
+      "epoch": 4.7023809523809526,
+      "grad_norm": 2.255563735961914,
+      "learning_rate": 1.1904761904761906e-06,
+      "loss": 0.7625,
+      "step": 1580
+    },
+    {
+      "epoch": 4.732142857142857,
+      "grad_norm": 2.1157748699188232,
+      "learning_rate": 1.0714285714285714e-06,
+      "loss": 0.8885,
+      "step": 1590
+    },
+    {
+      "epoch": 4.761904761904762,
+      "grad_norm": 3.0076255798339844,
+      "learning_rate": 9.523809523809525e-07,
+      "loss": 1.0166,
+      "step": 1600
+    },
+    {
+      "epoch": 4.791666666666667,
+      "grad_norm": 2.899481773376465,
+      "learning_rate": 8.333333333333333e-07,
+      "loss": 0.9983,
+      "step": 1610
+    },
+    {
+      "epoch": 4.821428571428571,
+      "grad_norm": 6.084941387176514,
+      "learning_rate": 7.142857142857143e-07,
+      "loss": 1.1526,
+      "step": 1620
+    },
+    {
+      "epoch": 4.851190476190476,
+      "grad_norm": 3.8710179328918457,
+      "learning_rate": 5.952380952380953e-07,
+      "loss": 0.8589,
+      "step": 1630
+    },
+    {
+      "epoch": 4.880952380952381,
+      "grad_norm": 2.1053106784820557,
+      "learning_rate": 4.7619047619047623e-07,
+      "loss": 0.8788,
+      "step": 1640
+    },
+    {
+      "epoch": 4.910714285714286,
+      "grad_norm": 2.2121217250823975,
+      "learning_rate": 3.5714285714285716e-07,
+      "loss": 0.8718,
+      "step": 1650
+    },
+    {
+      "epoch": 4.940476190476191,
+      "grad_norm": 2.3137481212615967,
+      "learning_rate": 2.3809523809523811e-07,
+      "loss": 0.7878,
+      "step": 1660
+    },
+    {
+      "epoch": 4.970238095238095,
+      "grad_norm": 2.676529884338379,
+      "learning_rate": 1.1904761904761906e-07,
+      "loss": 0.7782,
+      "step": 1670
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 7.775545597076416,
+      "learning_rate": 0.0,
+      "loss": 0.7847,
+      "step": 1680
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.9514767932489452,
+      "eval_loss": 0.8745647668838501,
+      "eval_runtime": 50.8678,
+      "eval_samples_per_second": 9.318,
+      "eval_steps_per_second": 1.18,
+      "step": 1680
+    },
+    {
+      "epoch": 5.0,
+      "step": 1680,
+      "total_flos": 1.0410532148820787e+18,
+      "train_loss": 1.5688391100792658,
+      "train_runtime": 1801.1044,
+      "train_samples_per_second": 7.457,
+      "train_steps_per_second": 0.933
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1680,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.0410532148820787e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31662c38d27298aa5f2cd538b8973a3eb6908906cc3a005f04bcb4fc904d82b6
+size 5368