{ "activation_function": "gelu_new", "architectures": [ "GPTNeoForTokenClassification" ], "attention_dropout": 0, "attention_layers": [ "global", "local", "global", "local", "global", "local", "global", "local", "global", "local", "global", "local", "global", "local", "global", "local", "global", "local", "global", "local", "global", "local", "global", "local" ], "attention_types": [ [ [ "global", "local" ], 12 ] ], "bos_token_id": 50256, "classifier_dropout": 0.1, "embed_dropout": 0, "eos_token_id": 50256, "finetuning_task": "ner", "gradient_checkpointing": false, "hidden_size": 2048, "id2label": { "0": "I-T191", "1": "I-T038", "2": "I-T048", "3": "B-T062", "4": "O", "5": "I-T031", "6": "I-T025", "7": "I-T043", "8": "B-T031", "9": "B-T005", "10": "B-T023", "11": "B-T033", "12": "I-T026", "13": "B-T116", "14": "I-T020", "15": "B-T201", "16": "I-T024", "17": "B-T039", "18": "B-T037", "19": "B-T044", "20": "I-T170", "21": "I-T063", "22": "I-T204", "23": "B-T017", "24": "I-T167", "25": "B-T045", "26": "B-T131", "27": "I-T201", "28": "I-T103", "29": "B-T170", "30": "B-T168", "31": "B-T046", "32": "I-T037", "33": "B-T190", "34": "I-T184", "35": "I-T059", "36": "B-T058", "37": "B-T167", "38": "B-T103", "39": "B-T074", "40": "B-T024", "41": "I-T017", "42": "I-T097", "43": "I-T070", "44": "B-T196", "45": "B-T114", "46": "B-T121", "47": "B-T204", "48": "B-T040", "49": "B-T091", "50": "B-T104", "51": "I-T007", "52": "B-T070", "53": "I-T023", "54": "I-T045", "55": "B-T063", "56": "I-T058", "57": "B-T169", "58": "B-T025", "59": "I-T044", "60": "I-T033", "61": "I-T091", "62": "B-T028", "63": "I-T168", "64": "B-T184", "65": "B-T123", "66": "B-T026", "67": "B-T092", "68": "B-T191", "69": "I-T047", "70": "I-T114", "71": "I-T022", "72": "I-T125", "73": "I-T116", "74": "B-T007", "75": "B-T022", "76": "B-T080", "77": "B-T019", "78": "I-T039", "79": "B-T098", "80": "B-T109", "81": "B-T125", "82": "B-T048", "83": "B-T059", "84": "I-T190", "85": "I-T092", "86": "B-T038", "87": "I-T019", "88": "B-T097", "89": "I-T109", "90": "I-T046", "91": "I-T082", "92": "I-T062", "93": "I-T129", "94": "B-T082", "95": "I-T005", "96": "I-T028", "97": "I-T041", "98": "B-T041", "99": "B-T047", "100": "B-T197", "101": "I-T121", "102": "B-T020", "103": "I-T074", "104": "B-T043", "105": "I-T098", "106": "B-T081", "107": "B-T129" }, "initializer_range": 0.02, "intermediate_size": null, "label2id": { "B-T005": 9, "B-T007": 74, "B-T017": 23, "B-T019": 77, "B-T020": 102, "B-T022": 75, "B-T023": 10, "B-T024": 40, "B-T025": 58, "B-T026": 66, "B-T028": 62, "B-T031": 8, "B-T033": 11, "B-T037": 18, "B-T038": 86, "B-T039": 17, "B-T040": 48, "B-T041": 98, "B-T043": 104, "B-T044": 19, "B-T045": 25, "B-T046": 31, "B-T047": 99, "B-T048": 82, "B-T058": 36, "B-T059": 83, "B-T062": 3, "B-T063": 55, "B-T070": 52, "B-T074": 39, "B-T080": 76, "B-T081": 106, "B-T082": 94, "B-T091": 49, "B-T092": 67, "B-T097": 88, "B-T098": 79, "B-T103": 38, "B-T104": 50, "B-T109": 80, "B-T114": 45, "B-T116": 13, "B-T121": 46, "B-T123": 65, "B-T125": 81, "B-T129": 107, "B-T131": 26, "B-T167": 37, "B-T168": 30, "B-T169": 57, "B-T170": 29, "B-T184": 64, "B-T190": 33, "B-T191": 68, "B-T196": 44, "B-T197": 100, "B-T201": 15, "B-T204": 47, "I-T005": 95, "I-T007": 51, "I-T017": 41, "I-T019": 87, "I-T020": 14, "I-T022": 71, "I-T023": 53, "I-T024": 16, "I-T025": 6, "I-T026": 12, "I-T028": 96, "I-T031": 5, "I-T033": 60, "I-T037": 32, "I-T038": 1, "I-T039": 78, "I-T041": 97, "I-T043": 7, "I-T044": 59, "I-T045": 54, "I-T046": 90, "I-T047": 69, "I-T048": 2, "I-T058": 56, "I-T059": 35, "I-T062": 92, "I-T063": 21, "I-T070": 43, "I-T074": 103, "I-T082": 91, "I-T091": 61, "I-T092": 85, "I-T097": 42, "I-T098": 105, "I-T103": 28, "I-T109": 89, "I-T114": 70, "I-T116": 73, "I-T121": 101, "I-T125": 72, "I-T129": 93, "I-T167": 24, "I-T168": 63, "I-T170": 20, "I-T184": 34, "I-T190": 84, "I-T191": 0, "I-T201": 27, "I-T204": 22, "O": 4 }, "layer_norm_epsilon": 1e-05, "max_position_embeddings": 2048, "model_type": "gpt_neo", "num_heads": 16, "num_layers": 24, "resid_dropout": 0, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "task_specific_params": { "text-generation": { "do_sample": true, "max_length": 50, "temperature": 0.9 } }, "tokenizer_class": "GPT2Tokenizer", "torch_dtype": "float32", "transformers_version": "4.50.3", "use_cache": true, "vocab_size": 50257, "window_size": 256 }