Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

.gitattributes +1 -0
added_tokens.json +3 -0
config.json +284 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +15 -0
spm.model +3 -0
tokenizer.json +3 -0
tokenizer_config.json +58 -0
trainer_state.json +953 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 250101
+}

config.json ADDED Viewed

	@@ -0,0 +1,284 @@

+{
+  "_name_or_path": "deberta-large",
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "harvard-cite-them-right",
+    "1": "thieme-german",
+    "2": "american-political-science-association",
+    "3": "american-society-for-horticultural-science",
+    "4": "hiob-ludolf-centre-for-ethiopian-studies",
+    "5": "mary-ann-liebert-vancouver",
+    "6": "the-geological-society-of-america",
+    "7": "modern-language-association",
+    "8": "african-online-scientific-information-systems-harvard",
+    "9": "american-medical-association",
+    "10": "annual-reviews",
+    "11": "oikos",
+    "12": "springer-socpsych",
+    "13": "vancouver",
+    "14": "royal-society-of-chemistry",
+    "15": "elsevier-harvard",
+    "16": "institute-of-mathematics-and-its-applications",
+    "17": "future-science-group",
+    "18": "ieee",
+    "19": "institute-of-mathematical-statistics",
+    "20": "the-rockefeller-university-press",
+    "21": "american-society-of-agricultural-and-biological-engineers",
+    "22": "politeknik-negeri-manado-jurnal-p3m",
+    "23": "chroniques-des-activites-archeologiques-de-l-ecole-francaise-de-rome",
+    "24": "modern-humanities-research-association",
+    "25": "medicina-clinica",
+    "26": "environment-and-planning",
+    "27": "style-manual-for-authors-editors-and-printers-snooks-co",
+    "28": "american-nuclear-society",
+    "29": "aims-press",
+    "30": "springer-vancouver",
+    "31": "proceedings-of-the-royal-society-b",
+    "32": "taylor-and-francis-chicago",
+    "33": "the-journal-of-comparative-neurology",
+    "34": "nature",
+    "35": "american-chemical-society",
+    "36": "die-bachelorarbeit-samac-et-al-in-text",
+    "37": "future-medicine",
+    "38": "international-union-of-crystallography",
+    "39": "copernicus-publications",
+    "40": "medicine-publishing",
+    "41": "american-society-for-microbiology",
+    "42": "springer-humanities",
+    "43": "springer-physics",
+    "44": "style-manual-australian-government-note",
+    "45": "institute-of-physics-harvard",
+    "46": "plos",
+    "47": "american-sociological-association",
+    "48": "taylor-and-francis-national-library-of-medicine",
+    "49": "canadian-journal-of-fisheries-and-aquatic-sciences",
+    "50": "elsevier",
+    "51": "american-society-of-civil-engineers",
+    "52": "inter-research-science-center",
+    "53": "the-lancet",
+    "54": "chicago",
+    "55": "elsevier-vancouver",
+    "56": "landes-bioscience-journals",
+    "57": "institute-for-operations-research-and-the-management-sciences",
+    "58": "american-institute-of-aeronautics-and-astronautics",
+    "59": "baishideng-publishing-group",
+    "60": "the-optical-society",
+    "61": "american-society-of-mechanical-engineers",
+    "62": "association-for-computing-machinery",
+    "63": "bristol-university-press",
+    "64": "cold-spring-harbor-laboratory-press",
+    "65": "spie-journals",
+    "66": "national-institute-of-health-research",
+    "67": "bmj",
+    "68": "mary-ann-liebert-harvard",
+    "69": "international-journal-of-wildland-fire",
+    "70": "institute-of-physics",
+    "71": "american-institute-of-physics",
+    "72": "american-statistical-association",
+    "73": "frontiers-medical-journals",
+    "74": "american-physiological-society",
+    "75": "the-institution-of-engineering-and-technology",
+    "76": "entomological-society-of-america",
+    "77": "african-online-scientific-information-systems-vancouver",
+    "78": "trends-journals",
+    "79": "springer-mathphys",
+    "80": "ecology",
+    "81": "the-company-of-biologists",
+    "82": "springer-basic",
+    "83": "american-society-for-pharmacology-and-experimental-therapeutics",
+    "84": "american-association-for-cancer-research",
+    "85": "american-meteorological-society",
+    "86": "the-geological-society-of-london",
+    "87": "karger-journals",
+    "88": "springer-fachzeitschriften-medizin-psychologie",
+    "89": "canadian-journal-of-soil-science",
+    "90": "begell-house-chicago",
+    "91": "spandidos-publications",
+    "92": "biomed-central",
+    "93": "cell",
+    "94": "council-of-science-editors",
+    "95": "frontiers",
+    "96": "embo-press",
+    "97": "emu-austral-ornithology",
+    "98": "microbiology-society",
+    "99": "pontifical-gregorian-university",
+    "100": "current-opinion",
+    "101": "sage-harvard",
+    "102": "the-institute-of-electronics-information-and-communication-engineers",
+    "103": "taylor-and-francis-council-of-science-editors",
+    "104": "european-journal-of-human-genetics",
+    "105": "american-geophysical-union",
+    "106": "integrated-science-publishing-journals",
+    "107": "universita-pontificia-salesiana",
+    "108": "american-fisheries-society",
+    "109": "international-studies-association",
+    "110": "american-physics-society",
+    "111": "european-society-of-cardiology",
+    "112": "oxford-university-press-scimed",
+    "113": "pensoft-journals",
+    "114": "multidisciplinary-digital-publishing-institute",
+    "115": "endocrine-press",
+    "116": "sage-vancouver",
+    "117": "academy-of-management-review",
+    "118": "american-marketing-association",
+    "119": "the-astrophysical-journal",
+    "120": "hainan-medical-university-journal-publisher",
+    "121": "museum-national-dhistoire-naturelle"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "academy-of-management-review": 117,
+    "african-online-scientific-information-systems-harvard": 8,
+    "african-online-scientific-information-systems-vancouver": 77,
+    "aims-press": 29,
+    "american-association-for-cancer-research": 84,
+    "american-chemical-society": 35,
+    "american-fisheries-society": 108,
+    "american-geophysical-union": 105,
+    "american-institute-of-aeronautics-and-astronautics": 58,
+    "american-institute-of-physics": 71,
+    "american-marketing-association": 118,
+    "american-medical-association": 9,
+    "american-meteorological-society": 85,
+    "american-nuclear-society": 28,
+    "american-physics-society": 110,
+    "american-physiological-society": 74,
+    "american-political-science-association": 2,
+    "american-society-for-horticultural-science": 3,
+    "american-society-for-microbiology": 41,
+    "american-society-for-pharmacology-and-experimental-therapeutics": 83,
+    "american-society-of-agricultural-and-biological-engineers": 21,
+    "american-society-of-civil-engineers": 51,
+    "american-society-of-mechanical-engineers": 61,
+    "american-sociological-association": 47,
+    "american-statistical-association": 72,
+    "annual-reviews": 10,
+    "association-for-computing-machinery": 62,
+    "baishideng-publishing-group": 59,
+    "begell-house-chicago": 90,
+    "biomed-central": 92,
+    "bmj": 67,
+    "bristol-university-press": 63,
+    "canadian-journal-of-fisheries-and-aquatic-sciences": 49,
+    "canadian-journal-of-soil-science": 89,
+    "cell": 93,
+    "chicago": 54,
+    "chroniques-des-activites-archeologiques-de-l-ecole-francaise-de-rome": 23,
+    "cold-spring-harbor-laboratory-press": 64,
+    "copernicus-publications": 39,
+    "council-of-science-editors": 94,
+    "current-opinion": 100,
+    "die-bachelorarbeit-samac-et-al-in-text": 36,
+    "ecology": 80,
+    "elsevier": 50,
+    "elsevier-harvard": 15,
+    "elsevier-vancouver": 55,
+    "embo-press": 96,
+    "emu-austral-ornithology": 97,
+    "endocrine-press": 115,
+    "entomological-society-of-america": 76,
+    "environment-and-planning": 26,
+    "european-journal-of-human-genetics": 104,
+    "european-society-of-cardiology": 111,
+    "frontiers": 95,
+    "frontiers-medical-journals": 73,
+    "future-medicine": 37,
+    "future-science-group": 17,
+    "hainan-medical-university-journal-publisher": 120,
+    "harvard-cite-them-right": 0,
+    "hiob-ludolf-centre-for-ethiopian-studies": 4,
+    "ieee": 18,
+    "institute-for-operations-research-and-the-management-sciences": 57,
+    "institute-of-mathematical-statistics": 19,
+    "institute-of-mathematics-and-its-applications": 16,
+    "institute-of-physics": 70,
+    "institute-of-physics-harvard": 45,
+    "integrated-science-publishing-journals": 106,
+    "inter-research-science-center": 52,
+    "international-journal-of-wildland-fire": 69,
+    "international-studies-association": 109,
+    "international-union-of-crystallography": 38,
+    "karger-journals": 87,
+    "landes-bioscience-journals": 56,
+    "mary-ann-liebert-harvard": 68,
+    "mary-ann-liebert-vancouver": 5,
+    "medicina-clinica": 25,
+    "medicine-publishing": 40,
+    "microbiology-society": 98,
+    "modern-humanities-research-association": 24,
+    "modern-language-association": 7,
+    "multidisciplinary-digital-publishing-institute": 114,
+    "museum-national-dhistoire-naturelle": 121,
+    "national-institute-of-health-research": 66,
+    "nature": 34,
+    "oikos": 11,
+    "oxford-university-press-scimed": 112,
+    "pensoft-journals": 113,
+    "plos": 46,
+    "politeknik-negeri-manado-jurnal-p3m": 22,
+    "pontifical-gregorian-university": 99,
+    "proceedings-of-the-royal-society-b": 31,
+    "royal-society-of-chemistry": 14,
+    "sage-harvard": 101,
+    "sage-vancouver": 116,
+    "spandidos-publications": 91,
+    "spie-journals": 65,
+    "springer-basic": 82,
+    "springer-fachzeitschriften-medizin-psychologie": 88,
+    "springer-humanities": 42,
+    "springer-mathphys": 79,
+    "springer-physics": 43,
+    "springer-socpsych": 12,
+    "springer-vancouver": 30,
+    "style-manual-australian-government-note": 44,
+    "style-manual-for-authors-editors-and-printers-snooks-co": 27,
+    "taylor-and-francis-chicago": 32,
+    "taylor-and-francis-council-of-science-editors": 103,
+    "taylor-and-francis-national-library-of-medicine": 48,
+    "the-astrophysical-journal": 119,
+    "the-company-of-biologists": 81,
+    "the-geological-society-of-america": 6,
+    "the-geological-society-of-london": 86,
+    "the-institute-of-electronics-information-and-communication-engineers": 102,
+    "the-institution-of-engineering-and-technology": 75,
+    "the-journal-of-comparative-neurology": 33,
+    "the-lancet": 53,
+    "the-optical-society": 60,
+    "the-rockefeller-university-press": 20,
+    "thieme-german": 1,
+    "trends-journals": 78,
+    "universita-pontificia-salesiana": 107,
+    "vancouver": 13
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "problem_type": "multi_label_classification",
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "type_vocab_size": 0,
+  "vocab_size": 251000
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24e31bd7c3da94f801b31796e2257f5cce1531e82c1839ac7df424e2542c817d
+size 1115637336

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f89f8dec433b084b8008bb8a92f4b7594c2339d70aa8a8b51d50480323e010e8
+size 2231394170

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd008cc57f6432b08bf036944ad8c15663aff9f3a0b3c83b8ac4d9837511a2b0
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9097c42c7958a57b9b27c5387f231b01d681e88ccc4a9a9d89cc46aeb70fe08f
+size 1064

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13c8d666d62a7bc4ac8f040aab68e942c861f93303156cc28f5c7e885d86d6e3
+size 4305025

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7dbb7b63c76007984d0e58a90ee901ceb5b16c8e78252d36ddcde748b3474a1a
+size 16331639

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250101": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,953 @@

+{
+  "best_metric": 0.9361970057366729,
+  "best_model_checkpoint": "deberta-bib-style-classification/checkpoint-61266",
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 61266,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.048966800509254726,
+      "grad_norm": 0.18245625495910645,
+      "learning_rate": 1.9836777331635818e-05,
+      "loss": 0.1329,
+      "step": 500
+    },
+    {
+      "epoch": 0.09793360101850945,
+      "grad_norm": 0.17618948221206665,
+      "learning_rate": 1.9673554663271638e-05,
+      "loss": 0.0446,
+      "step": 1000
+    },
+    {
+      "epoch": 0.14690040152776418,
+      "grad_norm": 0.16028529405593872,
+      "learning_rate": 1.9510331994907454e-05,
+      "loss": 0.0436,
+      "step": 1500
+    },
+    {
+      "epoch": 0.1958672020370189,
+      "grad_norm": 0.13800546526908875,
+      "learning_rate": 1.934710932654327e-05,
+      "loss": 0.0382,
+      "step": 2000
+    },
+    {
+      "epoch": 0.24483400254627363,
+      "grad_norm": 0.1666691154241562,
+      "learning_rate": 1.918388665817909e-05,
+      "loss": 0.0317,
+      "step": 2500
+    },
+    {
+      "epoch": 0.29380080305552836,
+      "grad_norm": 0.2547225058078766,
+      "learning_rate": 1.9020663989814907e-05,
+      "loss": 0.0276,
+      "step": 3000
+    },
+    {
+      "epoch": 0.3427676035647831,
+      "grad_norm": 0.10508100688457489,
+      "learning_rate": 1.8857441321450724e-05,
+      "loss": 0.0236,
+      "step": 3500
+    },
+    {
+      "epoch": 0.3917344040740378,
+      "grad_norm": 0.1274070292711258,
+      "learning_rate": 1.8694218653086543e-05,
+      "loss": 0.0208,
+      "step": 4000
+    },
+    {
+      "epoch": 0.44070120458329254,
+      "grad_norm": 0.1652757227420807,
+      "learning_rate": 1.853099598472236e-05,
+      "loss": 0.0187,
+      "step": 4500
+    },
+    {
+      "epoch": 0.48966800509254726,
+      "grad_norm": 0.15535283088684082,
+      "learning_rate": 1.8367773316358176e-05,
+      "loss": 0.0164,
+      "step": 5000
+    },
+    {
+      "epoch": 0.538634805601802,
+      "grad_norm": 0.1361575722694397,
+      "learning_rate": 1.8204550647993996e-05,
+      "loss": 0.0151,
+      "step": 5500
+    },
+    {
+      "epoch": 0.5876016061110567,
+      "grad_norm": 0.14795701205730438,
+      "learning_rate": 1.8041327979629813e-05,
+      "loss": 0.0141,
+      "step": 6000
+    },
+    {
+      "epoch": 0.6365684066203114,
+      "grad_norm": 0.24855226278305054,
+      "learning_rate": 1.787810531126563e-05,
+      "loss": 0.0128,
+      "step": 6500
+    },
+    {
+      "epoch": 0.6855352071295662,
+      "grad_norm": 0.11990305036306381,
+      "learning_rate": 1.771488264290145e-05,
+      "loss": 0.0119,
+      "step": 7000
+    },
+    {
+      "epoch": 0.7345020076388209,
+      "grad_norm": 0.12674401700496674,
+      "learning_rate": 1.7551659974537265e-05,
+      "loss": 0.0109,
+      "step": 7500
+    },
+    {
+      "epoch": 0.7834688081480756,
+      "grad_norm": 0.05565750598907471,
+      "learning_rate": 1.7388437306173082e-05,
+      "loss": 0.0103,
+      "step": 8000
+    },
+    {
+      "epoch": 0.8324356086573304,
+      "grad_norm": 0.1362127959728241,
+      "learning_rate": 1.72252146378089e-05,
+      "loss": 0.0097,
+      "step": 8500
+    },
+    {
+      "epoch": 0.8814024091665851,
+      "grad_norm": 0.1502208560705185,
+      "learning_rate": 1.7062318414781447e-05,
+      "loss": 0.0096,
+      "step": 9000
+    },
+    {
+      "epoch": 0.9303692096758398,
+      "grad_norm": 0.1307108849287033,
+      "learning_rate": 1.689942219175399e-05,
+      "loss": 0.009,
+      "step": 9500
+    },
+    {
+      "epoch": 0.9793360101850945,
+      "grad_norm": 0.08258962631225586,
+      "learning_rate": 1.6736199523389808e-05,
+      "loss": 0.0088,
+      "step": 10000
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7270709032884696,
+      "eval_f1": 0.8196123147092359,
+      "eval_loss": 0.007612535264343023,
+      "eval_roc_auc": 0.8736920143065684,
+      "eval_runtime": 56.0999,
+      "eval_samples_per_second": 128.467,
+      "eval_steps_per_second": 16.061,
+      "step": 10211
+    },
+    {
+      "epoch": 1.0283028106943493,
+      "grad_norm": 0.15880635380744934,
+      "learning_rate": 1.6572976855025628e-05,
+      "loss": 0.0086,
+      "step": 10500
+    },
+    {
+      "epoch": 1.077269611203604,
+      "grad_norm": 0.04505603387951851,
+      "learning_rate": 1.6409754186661444e-05,
+      "loss": 0.0082,
+      "step": 11000
+    },
+    {
+      "epoch": 1.1262364117128587,
+      "grad_norm": 0.1436477154493332,
+      "learning_rate": 1.624653151829726e-05,
+      "loss": 0.0076,
+      "step": 11500
+    },
+    {
+      "epoch": 1.1752032122221134,
+      "grad_norm": 0.10293476283550262,
+      "learning_rate": 1.608330884993308e-05,
+      "loss": 0.0074,
+      "step": 12000
+    },
+    {
+      "epoch": 1.2241700127313682,
+      "grad_norm": 0.12684179842472076,
+      "learning_rate": 1.5920086181568897e-05,
+      "loss": 0.0076,
+      "step": 12500
+    },
+    {
+      "epoch": 1.2731368132406229,
+      "grad_norm": 0.09429904818534851,
+      "learning_rate": 1.5756863513204713e-05,
+      "loss": 0.0073,
+      "step": 13000
+    },
+    {
+      "epoch": 1.3221036137498776,
+      "grad_norm": 0.24173329770565033,
+      "learning_rate": 1.5593640844840533e-05,
+      "loss": 0.0068,
+      "step": 13500
+    },
+    {
+      "epoch": 1.3710704142591323,
+      "grad_norm": 0.15347008407115936,
+      "learning_rate": 1.543041817647635e-05,
+      "loss": 0.0069,
+      "step": 14000
+    },
+    {
+      "epoch": 1.420037214768387,
+      "grad_norm": 0.30062130093574524,
+      "learning_rate": 1.5267195508112166e-05,
+      "loss": 0.0064,
+      "step": 14500
+    },
+    {
+      "epoch": 1.4690040152776418,
+      "grad_norm": 0.0312146358191967,
+      "learning_rate": 1.5103972839747984e-05,
+      "loss": 0.0065,
+      "step": 15000
+    },
+    {
+      "epoch": 1.5179708157868965,
+      "grad_norm": 0.19778664410114288,
+      "learning_rate": 1.4941076616720531e-05,
+      "loss": 0.0063,
+      "step": 15500
+    },
+    {
+      "epoch": 1.5669376162961512,
+      "grad_norm": 0.07010342180728912,
+      "learning_rate": 1.4778180393693078e-05,
+      "loss": 0.0058,
+      "step": 16000
+    },
+    {
+      "epoch": 1.615904416805406,
+      "grad_norm": 0.11643481999635696,
+      "learning_rate": 1.4614957725328896e-05,
+      "loss": 0.0058,
+      "step": 16500
+    },
+    {
+      "epoch": 1.6648712173146607,
+      "grad_norm": 0.14050887525081635,
+      "learning_rate": 1.4452061502301439e-05,
+      "loss": 0.0059,
+      "step": 17000
+    },
+    {
+      "epoch": 1.7138380178239154,
+      "grad_norm": 0.09967122972011566,
+      "learning_rate": 1.4288838833937257e-05,
+      "loss": 0.0053,
+      "step": 17500
+    },
+    {
+      "epoch": 1.7628048183331702,
+      "grad_norm": 0.06283292174339294,
+      "learning_rate": 1.4125616165573075e-05,
+      "loss": 0.0057,
+      "step": 18000
+    },
+    {
+      "epoch": 1.8117716188424249,
+      "grad_norm": 0.04395497962832451,
+      "learning_rate": 1.3962393497208892e-05,
+      "loss": 0.0051,
+      "step": 18500
+    },
+    {
+      "epoch": 1.8607384193516796,
+      "grad_norm": 0.15392401814460754,
+      "learning_rate": 1.379917082884471e-05,
+      "loss": 0.0049,
+      "step": 19000
+    },
+    {
+      "epoch": 1.9097052198609343,
+      "grad_norm": 0.053341180086135864,
+      "learning_rate": 1.3635948160480528e-05,
+      "loss": 0.0052,
+      "step": 19500
+    },
+    {
+      "epoch": 1.958672020370189,
+      "grad_norm": 0.14263851940631866,
+      "learning_rate": 1.3472725492116345e-05,
+      "loss": 0.005,
+      "step": 20000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8507007076453448,
+      "eval_f1": 0.8938003106905804,
+      "eval_loss": 0.00466541014611721,
+      "eval_roc_auc": 0.9387280731428467,
+      "eval_runtime": 55.7959,
+      "eval_samples_per_second": 129.167,
+      "eval_steps_per_second": 16.148,
+      "step": 20422
+    },
+    {
+      "epoch": 2.0076388208794436,
+      "grad_norm": 0.16177518665790558,
+      "learning_rate": 1.3309502823752163e-05,
+      "loss": 0.0048,
+      "step": 20500
+    },
+    {
+      "epoch": 2.0566056213886985,
+      "grad_norm": 0.13520753383636475,
+      "learning_rate": 1.314628015538798e-05,
+      "loss": 0.0045,
+      "step": 21000
+    },
+    {
+      "epoch": 2.105572421897953,
+      "grad_norm": 0.09872843325138092,
+      "learning_rate": 1.2983057487023797e-05,
+      "loss": 0.0043,
+      "step": 21500
+    },
+    {
+      "epoch": 2.154539222407208,
+      "grad_norm": 0.07923103123903275,
+      "learning_rate": 1.2819834818659615e-05,
+      "loss": 0.0042,
+      "step": 22000
+    },
+    {
+      "epoch": 2.2035060229164625,
+      "grad_norm": 0.01526894886046648,
+      "learning_rate": 1.2656612150295434e-05,
+      "loss": 0.0041,
+      "step": 22500
+    },
+    {
+      "epoch": 2.2524728234257174,
+      "grad_norm": 0.09783605486154556,
+      "learning_rate": 1.249338948193125e-05,
+      "loss": 0.0042,
+      "step": 23000
+    },
+    {
+      "epoch": 2.301439623934972,
+      "grad_norm": 0.12495558708906174,
+      "learning_rate": 1.2330819704240527e-05,
+      "loss": 0.0042,
+      "step": 23500
+    },
+    {
+      "epoch": 2.350406424444227,
+      "grad_norm": 0.09078874439001083,
+      "learning_rate": 1.2167597035876343e-05,
+      "loss": 0.0045,
+      "step": 24000
+    },
+    {
+      "epoch": 2.3993732249534814,
+      "grad_norm": 0.0656205415725708,
+      "learning_rate": 1.2004374367512162e-05,
+      "loss": 0.0039,
+      "step": 24500
+    },
+    {
+      "epoch": 2.4483400254627363,
+      "grad_norm": 0.22209672629833221,
+      "learning_rate": 1.184115169914798e-05,
+      "loss": 0.0043,
+      "step": 25000
+    },
+    {
+      "epoch": 2.497306825971991,
+      "grad_norm": 0.4231460988521576,
+      "learning_rate": 1.1677929030783796e-05,
+      "loss": 0.0042,
+      "step": 25500
+    },
+    {
+      "epoch": 2.5462736264812458,
+      "grad_norm": 0.0051184347830712795,
+      "learning_rate": 1.1514706362419614e-05,
+      "loss": 0.0039,
+      "step": 26000
+    },
+    {
+      "epoch": 2.5952404269905003,
+      "grad_norm": 0.15397199988365173,
+      "learning_rate": 1.1351483694055432e-05,
+      "loss": 0.0037,
+      "step": 26500
+    },
+    {
+      "epoch": 2.6442072274997552,
+      "grad_norm": 0.03935805708169937,
+      "learning_rate": 1.1188261025691249e-05,
+      "loss": 0.0035,
+      "step": 27000
+    },
+    {
+      "epoch": 2.6931740280090097,
+      "grad_norm": 0.011553222313523293,
+      "learning_rate": 1.1025364802663794e-05,
+      "loss": 0.0038,
+      "step": 27500
+    },
+    {
+      "epoch": 2.7421408285182647,
+      "grad_norm": 0.06817249953746796,
+      "learning_rate": 1.0862142134299612e-05,
+      "loss": 0.0035,
+      "step": 28000
+    },
+    {
+      "epoch": 2.791107629027519,
+      "grad_norm": 0.003078105626627803,
+      "learning_rate": 1.0699245911272159e-05,
+      "loss": 0.0036,
+      "step": 28500
+    },
+    {
+      "epoch": 2.840074429536774,
+      "grad_norm": 0.11958350241184235,
+      "learning_rate": 1.0536023242907975e-05,
+      "loss": 0.0037,
+      "step": 29000
+    },
+    {
+      "epoch": 2.8890412300460286,
+      "grad_norm": 0.17206184566020966,
+      "learning_rate": 1.0372800574543793e-05,
+      "loss": 0.0036,
+      "step": 29500
+    },
+    {
+      "epoch": 2.9380080305552836,
+      "grad_norm": 0.018106259405612946,
+      "learning_rate": 1.0209577906179611e-05,
+      "loss": 0.0035,
+      "step": 30000
+    },
+    {
+      "epoch": 2.986974831064538,
+      "grad_norm": 0.14708341658115387,
+      "learning_rate": 1.0046355237815428e-05,
+      "loss": 0.0035,
+      "step": 30500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.8998196198140697,
+      "eval_f1": 0.9224077451943314,
+      "eval_loss": 0.0033930453937500715,
+      "eval_roc_auc": 0.9558131614465735,
+      "eval_runtime": 55.6722,
+      "eval_samples_per_second": 129.454,
+      "eval_steps_per_second": 16.184,
+      "step": 30633
+    },
+    {
+      "epoch": 3.035941631573793,
+      "grad_norm": 0.03020176850259304,
+      "learning_rate": 9.883132569451246e-06,
+      "loss": 0.0033,
+      "step": 31000
+    },
+    {
+      "epoch": 3.0849084320830475,
+      "grad_norm": 0.1869770586490631,
+      "learning_rate": 9.720236346423793e-06,
+      "loss": 0.0029,
+      "step": 31500
+    },
+    {
+      "epoch": 3.1338752325923025,
+      "grad_norm": 0.02884034253656864,
+      "learning_rate": 9.55701367805961e-06,
+      "loss": 0.0033,
+      "step": 32000
+    },
+    {
+      "epoch": 3.182842033101557,
+      "grad_norm": 0.003137261839583516,
+      "learning_rate": 9.393791009695427e-06,
+      "loss": 0.0031,
+      "step": 32500
+    },
+    {
+      "epoch": 3.231808833610812,
+      "grad_norm": 0.007661271840333939,
+      "learning_rate": 9.230568341331245e-06,
+      "loss": 0.0028,
+      "step": 33000
+    },
+    {
+      "epoch": 3.2807756341200665,
+      "grad_norm": 0.006506490521132946,
+      "learning_rate": 9.06767211830379e-06,
+      "loss": 0.0029,
+      "step": 33500
+    },
+    {
+      "epoch": 3.3297424346293214,
+      "grad_norm": 0.03698953613638878,
+      "learning_rate": 8.904449449939609e-06,
+      "loss": 0.003,
+      "step": 34000
+    },
+    {
+      "epoch": 3.378709235138576,
+      "grad_norm": 0.1853983998298645,
+      "learning_rate": 8.741226781575427e-06,
+      "loss": 0.0029,
+      "step": 34500
+    },
+    {
+      "epoch": 3.427676035647831,
+      "grad_norm": 0.053507931530475616,
+      "learning_rate": 8.578004113211243e-06,
+      "loss": 0.0029,
+      "step": 35000
+    },
+    {
+      "epoch": 3.4766428361570854,
+      "grad_norm": 0.10720884054899216,
+      "learning_rate": 8.414781444847061e-06,
+      "loss": 0.003,
+      "step": 35500
+    },
+    {
+      "epoch": 3.5256096366663403,
+      "grad_norm": 0.015493680723011494,
+      "learning_rate": 8.25155877648288e-06,
+      "loss": 0.0031,
+      "step": 36000
+    },
+    {
+      "epoch": 3.574576437175595,
+      "grad_norm": 0.07669718563556671,
+      "learning_rate": 8.088336108118696e-06,
+      "loss": 0.0029,
+      "step": 36500
+    },
+    {
+      "epoch": 3.6235432376848498,
+      "grad_norm": 0.16198168694972992,
+      "learning_rate": 7.925439885091243e-06,
+      "loss": 0.003,
+      "step": 37000
+    },
+    {
+      "epoch": 3.6725100381941043,
+      "grad_norm": 0.00896318256855011,
+      "learning_rate": 7.76254366206379e-06,
+      "loss": 0.0029,
+      "step": 37500
+    },
+    {
+      "epoch": 3.721476838703359,
+      "grad_norm": 0.12104916572570801,
+      "learning_rate": 7.599320993699606e-06,
+      "loss": 0.0028,
+      "step": 38000
+    },
+    {
+      "epoch": 3.7704436392126137,
+      "grad_norm": 0.10880939662456512,
+      "learning_rate": 7.436098325335424e-06,
+      "loss": 0.0029,
+      "step": 38500
+    },
+    {
+      "epoch": 3.8194104397218687,
+      "grad_norm": 0.030039768666028976,
+      "learning_rate": 7.272875656971241e-06,
+      "loss": 0.0027,
+      "step": 39000
+    },
+    {
+      "epoch": 3.868377240231123,
+      "grad_norm": 0.09756383299827576,
+      "learning_rate": 7.109652988607058e-06,
+      "loss": 0.0029,
+      "step": 39500
+    },
+    {
+      "epoch": 3.917344040740378,
+      "grad_norm": 0.002539890818297863,
+      "learning_rate": 6.9464303202428765e-06,
+      "loss": 0.0027,
+      "step": 40000
+    },
+    {
+      "epoch": 3.9663108412496326,
+      "grad_norm": 0.08850258588790894,
+      "learning_rate": 6.783207651878694e-06,
+      "loss": 0.0029,
+      "step": 40500
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9124462328291938,
+      "eval_f1": 0.9324835411122006,
+      "eval_loss": 0.0029195661190897226,
+      "eval_roc_auc": 0.9616052804493336,
+      "eval_runtime": 55.6822,
+      "eval_samples_per_second": 129.431,
+      "eval_steps_per_second": 16.181,
+      "step": 40844
+    },
+    {
+      "epoch": 4.015277641758887,
+      "grad_norm": 0.1031348779797554,
+      "learning_rate": 6.619984983514511e-06,
+      "loss": 0.0027,
+      "step": 41000
+    },
+    {
+      "epoch": 4.0642444422681425,
+      "grad_norm": 0.07331918925046921,
+      "learning_rate": 6.456762315150329e-06,
+      "loss": 0.0025,
+      "step": 41500
+    },
+    {
+      "epoch": 4.113211242777397,
+      "grad_norm": 0.10652918368577957,
+      "learning_rate": 6.2935396467861465e-06,
+      "loss": 0.0025,
+      "step": 42000
+    },
+    {
+      "epoch": 4.1621780432866515,
+      "grad_norm": 0.11473935097455978,
+      "learning_rate": 6.130316978421964e-06,
+      "loss": 0.0026,
+      "step": 42500
+    },
+    {
+      "epoch": 4.211144843795906,
+      "grad_norm": 0.0035420297645032406,
+      "learning_rate": 5.967094310057782e-06,
+      "loss": 0.0025,
+      "step": 43000
+    },
+    {
+      "epoch": 4.260111644305161,
+      "grad_norm": 0.01006217859685421,
+      "learning_rate": 5.8045245323670555e-06,
+      "loss": 0.0025,
+      "step": 43500
+    },
+    {
+      "epoch": 4.309078444814416,
+      "grad_norm": 0.09146247059106827,
+      "learning_rate": 5.641301864002873e-06,
+      "loss": 0.0022,
+      "step": 44000
+    },
+    {
+      "epoch": 4.35804524532367,
+      "grad_norm": 0.05604245886206627,
+      "learning_rate": 5.47807919563869e-06,
+      "loss": 0.0025,
+      "step": 44500
+    },
+    {
+      "epoch": 4.407012045832925,
+      "grad_norm": 0.017924629151821136,
+      "learning_rate": 5.314856527274508e-06,
+      "loss": 0.0025,
+      "step": 45000
+    },
+    {
+      "epoch": 4.45597884634218,
+      "grad_norm": 0.06204945594072342,
+      "learning_rate": 5.1516338589103255e-06,
+      "loss": 0.0026,
+      "step": 45500
+    },
+    {
+      "epoch": 4.504945646851435,
+      "grad_norm": 0.06027592718601227,
+      "learning_rate": 4.988411190546144e-06,
+      "loss": 0.0024,
+      "step": 46000
+    },
+    {
+      "epoch": 4.553912447360689,
+      "grad_norm": 0.26520290970802307,
+      "learning_rate": 4.825188522181961e-06,
+      "loss": 0.0022,
+      "step": 46500
+    },
+    {
+      "epoch": 4.602879247869944,
+      "grad_norm": 0.025959959253668785,
+      "learning_rate": 4.661965853817778e-06,
+      "loss": 0.0022,
+      "step": 47000
+    },
+    {
+      "epoch": 4.651846048379199,
+      "grad_norm": 0.0017334806034341455,
+      "learning_rate": 4.498743185453596e-06,
+      "loss": 0.0026,
+      "step": 47500
+    },
+    {
+      "epoch": 4.700812848888454,
+      "grad_norm": 0.09476437419652939,
+      "learning_rate": 4.335520517089414e-06,
+      "loss": 0.0024,
+      "step": 48000
+    },
+    {
+      "epoch": 4.749779649397708,
+      "grad_norm": 0.011143738403916359,
+      "learning_rate": 4.172297848725231e-06,
+      "loss": 0.0021,
+      "step": 48500
+    },
+    {
+      "epoch": 4.798746449906963,
+      "grad_norm": 0.10621017217636108,
+      "learning_rate": 4.009075180361049e-06,
+      "loss": 0.0023,
+      "step": 49000
+    },
+    {
+      "epoch": 4.847713250416218,
+      "grad_norm": 0.004438555799424648,
+      "learning_rate": 3.845852511996866e-06,
+      "loss": 0.0023,
+      "step": 49500
+    },
+    {
+      "epoch": 4.896680050925473,
+      "grad_norm": 0.09955357015132904,
+      "learning_rate": 3.682629843632684e-06,
+      "loss": 0.0022,
+      "step": 50000
+    },
+    {
+      "epoch": 4.945646851434727,
+      "grad_norm": 0.14140157401561737,
+      "learning_rate": 3.51973362060523e-06,
+      "loss": 0.0021,
+      "step": 50500
+    },
+    {
+      "epoch": 4.994613651943982,
+      "grad_norm": 0.06944791227579117,
+      "learning_rate": 3.3565109522410477e-06,
+      "loss": 0.0022,
+      "step": 51000
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.9174413764395727,
+      "eval_f1": 0.9352578475336324,
+      "eval_loss": 0.0027621558401733637,
+      "eval_roc_auc": 0.9627978767199474,
+      "eval_runtime": 55.5741,
+      "eval_samples_per_second": 129.683,
+      "eval_steps_per_second": 16.213,
+      "step": 51055
+    },
+    {
+      "epoch": 5.043580452453237,
+      "grad_norm": 0.011898011900484562,
+      "learning_rate": 3.1932882838768654e-06,
+      "loss": 0.0021,
+      "step": 51500
+    },
+    {
+      "epoch": 5.0925472529624916,
+      "grad_norm": 0.09524281322956085,
+      "learning_rate": 3.0300656155126827e-06,
+      "loss": 0.002,
+      "step": 52000
+    },
+    {
+      "epoch": 5.141514053471746,
+      "grad_norm": 0.08005507290363312,
+      "learning_rate": 2.8668429471485004e-06,
+      "loss": 0.0019,
+      "step": 52500
+    },
+    {
+      "epoch": 5.190480853981001,
+      "grad_norm": 0.05041489377617836,
+      "learning_rate": 2.703946724121046e-06,
+      "loss": 0.0022,
+      "step": 53000
+    },
+    {
+      "epoch": 5.239447654490256,
+      "grad_norm": 0.001608343911357224,
+      "learning_rate": 2.5407240557568635e-06,
+      "loss": 0.0019,
+      "step": 53500
+    },
+    {
+      "epoch": 5.2884144549995105,
+      "grad_norm": 0.11538127809762955,
+      "learning_rate": 2.3775013873926812e-06,
+      "loss": 0.0019,
+      "step": 54000
+    },
+    {
+      "epoch": 5.337381255508765,
+      "grad_norm": 0.11458936333656311,
+      "learning_rate": 2.214278719028499e-06,
+      "loss": 0.002,
+      "step": 54500
+    },
+    {
+      "epoch": 5.3863480560180195,
+      "grad_norm": 0.07239941507577896,
+      "learning_rate": 2.051382496001045e-06,
+      "loss": 0.0021,
+      "step": 55000
+    },
+    {
+      "epoch": 5.435314856527275,
+      "grad_norm": 0.1313902884721756,
+      "learning_rate": 1.8881598276368623e-06,
+      "loss": 0.0018,
+      "step": 55500
+    },
+    {
+      "epoch": 5.484281657036529,
+      "grad_norm": 0.0023473671171814203,
+      "learning_rate": 1.7249371592726798e-06,
+      "loss": 0.0021,
+      "step": 56000
+    },
+    {
+      "epoch": 5.533248457545784,
+      "grad_norm": 0.09586118161678314,
+      "learning_rate": 1.5617144909084975e-06,
+      "loss": 0.0019,
+      "step": 56500
+    },
+    {
+      "epoch": 5.582215258055038,
+      "grad_norm": 0.08927006274461746,
+      "learning_rate": 1.398491822544315e-06,
+      "loss": 0.0019,
+      "step": 57000
+    },
+    {
+      "epoch": 5.631182058564294,
+      "grad_norm": 0.011845240369439125,
+      "learning_rate": 1.235595599516861e-06,
+      "loss": 0.002,
+      "step": 57500
+    },
+    {
+      "epoch": 5.680148859073548,
+      "grad_norm": 0.041209351271390915,
+      "learning_rate": 1.0723729311526786e-06,
+      "loss": 0.002,
+      "step": 58000
+    },
+    {
+      "epoch": 5.729115659582803,
+      "grad_norm": 0.09277820587158203,
+      "learning_rate": 9.091502627884961e-07,
+      "loss": 0.002,
+      "step": 58500
+    },
+    {
+      "epoch": 5.778082460092057,
+      "grad_norm": 0.07851295173168182,
+      "learning_rate": 7.459275944243137e-07,
+      "loss": 0.002,
+      "step": 59000
+    },
+    {
+      "epoch": 5.827049260601313,
+      "grad_norm": 0.046077970415353775,
+      "learning_rate": 5.827049260601313e-07,
+      "loss": 0.0018,
+      "step": 59500
+    },
+    {
+      "epoch": 5.876016061110567,
+      "grad_norm": 0.13472139835357666,
+      "learning_rate": 4.198087030326772e-07,
+      "loss": 0.0019,
+      "step": 60000
+    },
+    {
+      "epoch": 5.924982861619822,
+      "grad_norm": 0.05562426894903183,
+      "learning_rate": 2.5658603466849477e-07,
+      "loss": 0.0019,
+      "step": 60500
+    },
+    {
+      "epoch": 5.973949662129076,
+      "grad_norm": 0.016598107293248177,
+      "learning_rate": 9.368981164104072e-08,
+      "loss": 0.0019,
+      "step": 61000
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.920632718190648,
+      "eval_f1": 0.9361970057366729,
+      "eval_loss": 0.002764922333881259,
+      "eval_roc_auc": 0.9639744188099952,
+      "eval_runtime": 55.7082,
+      "eval_samples_per_second": 129.37,
+      "eval_steps_per_second": 16.174,
+      "step": 61266
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 61266,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.2909461039327232e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a588501ad64dc7adbf0d74d74c4fd5d40991870acf2b2b7bde589c083735114d
+size 5176