Upload 13 files

Browse files

Files changed (12) hide show

adapter_config.json +28 -28
adapter_model.safetensors +1 -1
added_tokens.json +3 -0
optimizer.pt +2 -2
rng_state.pth +1 -1
scheduler.pt +1 -1
special_tokens_map.json +24 -24
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +49 -49
trainer_state.json +313 -173
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -1,29 +1,29 @@
-{
-  "alpha_pattern": {},
-  "auto_mapping": null,
-  "base_model_name_or_path": "NousResearch/Llama-2-7b-chat-hf",
-  "bias": "none",
-  "fan_in_fan_out": false,
-  "inference_mode": true,
-  "init_lora_weights": true,
-  "layer_replication": null,
-  "layers_pattern": null,
-  "layers_to_transform": null,
-  "loftq_config": {},
-  "lora_alpha": 16,
-  "lora_dropout": 0.1,
-  "megatron_config": null,
-  "megatron_core": "megatron.core",
-  "modules_to_save": null,
-  "peft_type": "LORA",
-  "r": 64,
-  "rank_pattern": {},
-  "revision": null,
-  "target_modules": [
-    "v_proj",
-    "q_proj"
-  ],
-  "task_type": "CAUSAL_LM",
-  "use_dora": false,
-  "use_rslora": false
 }

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-7b-chat-hf",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8e3340167502f2b41f4de90f4c3a53c964f4bc187b3f7fd63df370aec634d98
 size 134235048

 version https://git-lfs.github.com/spec/v1
+oid sha256:a40e83b6ec10c3e18d82e9aae7fd1bd5640d73c4a12f9cd272527e448f237650
 size 134235048

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<pad>": 32000
+}

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bcf2a037e3398760b7b14ac4d5c81a12255a5ff5a28f81b1ea7c6b44f46b6bcf
-size 68312954

 version https://git-lfs.github.com/spec/v1
+oid sha256:6877126e4b893a0d6e5f0d8230d54a065c0a9028231747d7bdae69d2f0938025
+size 268515066

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92f29cd701c5a72a5a0f87abaff2371b8a7db57ea8841275e79535daba928715
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:31f94440a84de8dcb7e3dc56f62912fe1cffa12753d6b0c3c9602e4effda4eec
 size 14244

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52c4adb36f7c6ddb151663ae4716f16b16c64d8383b00e12592d7abb8b5accfc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:19bcbcfd505eb4fc1a12c7d832a77268d82b4a9b82157e01d48b5fb1cf5fbcc1
 size 1064

special_tokens_map.json CHANGED Viewed

@@ -1,24 +1,24 @@
-{
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "</s>",
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json CHANGED Viewed

@@ -1,49 +1,49 @@
-{
-  "add_bos_token": true,
-  "add_eos_token": false,
-  "add_prefix_space": null,
-  "added_tokens_decoder": {
-    "0": {
-      "content": "<unk>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<s>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "</s>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "32000": {
-      "content": "<pad>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    }
-  },
-  "bos_token": "<s>",
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "</s>",
-  "legacy": false,
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "</s>",
-  "sp_model_kwargs": {},
-  "tokenizer_class": "LlamaTokenizer",
-  "unk_token": "<unk>",
-  "use_default_system_prompt": false
-}

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

trainer_state.json CHANGED Viewed

@@ -1,173 +1,313 @@
-{
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.001779061865097297,
-  "eval_steps": 500,
-  "global_step": 500,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 8.895309325486485e-05,
-      "grad_norm": 0.5721328258514404,
-      "learning_rate": 2.5e-05,
-      "loss": 2.8022,
-      "step": 25
-    },
-    {
-      "epoch": 0.0001779061865097297,
-      "grad_norm": 0.8923015594482422,
-      "learning_rate": 2.5e-05,
-      "loss": 2.7907,
-      "step": 50
-    },
-    {
-      "epoch": 0.0002668592797645945,
-      "grad_norm": 0.4010128676891327,
-      "learning_rate": 2.5e-05,
-      "loss": 2.2071,
-      "step": 75
-    },
-    {
-      "epoch": 0.0003558123730194594,
-      "grad_norm": 1.1923198699951172,
-      "learning_rate": 2.5e-05,
-      "loss": 2.0296,
-      "step": 100
-    },
-    {
-      "epoch": 0.00044476546627432423,
-      "grad_norm": 0.44945457577705383,
-      "learning_rate": 2.5e-05,
-      "loss": 1.6782,
-      "step": 125
-    },
-    {
-      "epoch": 0.000533718559529189,
-      "grad_norm": 1.238482117652893,
-      "learning_rate": 2.5e-05,
-      "loss": 1.3948,
-      "step": 150
-    },
-    {
-      "epoch": 0.0006226716527840539,
-      "grad_norm": 1.2721318006515503,
-      "learning_rate": 2.5e-05,
-      "loss": 1.4995,
-      "step": 175
-    },
-    {
-      "epoch": 0.0007116247460389188,
-      "grad_norm": 0.647321343421936,
-      "learning_rate": 2.5e-05,
-      "loss": 1.5624,
-      "step": 200
-    },
-    {
-      "epoch": 0.0008005778392937836,
-      "grad_norm": 2.433274984359741,
-      "learning_rate": 2.5e-05,
-      "loss": 1.6035,
-      "step": 225
-    },
-    {
-      "epoch": 0.0008895309325486485,
-      "grad_norm": 0.5926241278648376,
-      "learning_rate": 2.5e-05,
-      "loss": 1.4234,
-      "step": 250
-    },
-    {
-      "epoch": 0.0009784840258035134,
-      "grad_norm": 1.6820074319839478,
-      "learning_rate": 2.5e-05,
-      "loss": 1.1803,
-      "step": 275
-    },
-    {
-      "epoch": 0.001067437119058378,
-      "grad_norm": 1.5319762229919434,
-      "learning_rate": 2.5e-05,
-      "loss": 1.3767,
-      "step": 300
-    },
-    {
-      "epoch": 0.001156390212313243,
-      "grad_norm": 0.5725632309913635,
-      "learning_rate": 2.5e-05,
-      "loss": 1.245,
-      "step": 325
-    },
-    {
-      "epoch": 0.0012453433055681078,
-      "grad_norm": 0.7384375929832458,
-      "learning_rate": 2.5e-05,
-      "loss": 1.2259,
-      "step": 350
-    },
-    {
-      "epoch": 0.0013342963988229727,
-      "grad_norm": 0.5739320516586304,
-      "learning_rate": 2.5e-05,
-      "loss": 1.2383,
-      "step": 375
-    },
-    {
-      "epoch": 0.0014232494920778376,
-      "grad_norm": 0.962674617767334,
-      "learning_rate": 2.5e-05,
-      "loss": 1.2162,
-      "step": 400
-    },
-    {
-      "epoch": 0.0015122025853327023,
-      "grad_norm": 1.2786195278167725,
-      "learning_rate": 2.5e-05,
-      "loss": 1.224,
-      "step": 425
-    },
-    {
-      "epoch": 0.0016011556785875672,
-      "grad_norm": 1.3110697269439697,
-      "learning_rate": 2.5e-05,
-      "loss": 1.3172,
-      "step": 450
-    },
-    {
-      "epoch": 0.001690108771842432,
-      "grad_norm": 0.6006544828414917,
-      "learning_rate": 2.5e-05,
-      "loss": 1.3636,
-      "step": 475
-    },
-    {
-      "epoch": 0.001779061865097297,
-      "grad_norm": 0.6102003455162048,
-      "learning_rate": 2.5e-05,
-      "loss": 1.1117,
-      "step": 500
-    }
-  ],
-  "logging_steps": 25,
-  "max_steps": 500,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 1,
-  "save_steps": 50,
-  "stateful_callbacks": {
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": true
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 1.0200419401728e+16,
-  "train_batch_size": 1,
-  "trial_name": null,
-  "trial_params": null
-}

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.025,
+      "grad_norm": 0.2357209473848343,
+      "learning_rate": 0.0002,
+      "loss": 1.4572,
+      "step": 25
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 4.098695278167725,
+      "learning_rate": 0.0002,
+      "loss": 1.6134,
+      "step": 50
+    },
+    {
+      "epoch": 0.075,
+      "grad_norm": 0.36763498187065125,
+      "learning_rate": 0.0002,
+      "loss": 1.1204,
+      "step": 75
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 3.215606927871704,
+      "learning_rate": 0.0002,
+      "loss": 1.6429,
+      "step": 100
+    },
+    {
+      "epoch": 0.125,
+      "grad_norm": 0.24122707545757294,
+      "learning_rate": 0.0002,
+      "loss": 1.1536,
+      "step": 125
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 1.240979552268982,
+      "learning_rate": 0.0002,
+      "loss": 1.5328,
+      "step": 150
+    },
+    {
+      "epoch": 0.175,
+      "grad_norm": 0.5094348788261414,
+      "learning_rate": 0.0002,
+      "loss": 1.1057,
+      "step": 175
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 1.9274498224258423,
+      "learning_rate": 0.0002,
+      "loss": 1.4537,
+      "step": 200
+    },
+    {
+      "epoch": 0.225,
+      "grad_norm": 0.22062620520591736,
+      "learning_rate": 0.0002,
+      "loss": 1.0565,
+      "step": 225
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 1.489973545074463,
+      "learning_rate": 0.0002,
+      "loss": 1.3707,
+      "step": 250
+    },
+    {
+      "epoch": 0.275,
+      "grad_norm": 0.2041197568178177,
+      "learning_rate": 0.0002,
+      "loss": 1.1186,
+      "step": 275
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 0.5328978896141052,
+      "learning_rate": 0.0002,
+      "loss": 1.5287,
+      "step": 300
+    },
+    {
+      "epoch": 0.325,
+      "grad_norm": 0.2129279226064682,
+      "learning_rate": 0.0002,
+      "loss": 1.3442,
+      "step": 325
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 0.42091837525367737,
+      "learning_rate": 0.0002,
+      "loss": 1.4075,
+      "step": 350
+    },
+    {
+      "epoch": 0.375,
+      "grad_norm": 0.13680295646190643,
+      "learning_rate": 0.0002,
+      "loss": 1.2503,
+      "step": 375
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.8510954976081848,
+      "learning_rate": 0.0002,
+      "loss": 1.3453,
+      "step": 400
+    },
+    {
+      "epoch": 0.425,
+      "grad_norm": 0.19430848956108093,
+      "learning_rate": 0.0002,
+      "loss": 1.1908,
+      "step": 425
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 0.6921439170837402,
+      "learning_rate": 0.0002,
+      "loss": 1.3728,
+      "step": 450
+    },
+    {
+      "epoch": 0.475,
+      "grad_norm": 0.2126462459564209,
+      "learning_rate": 0.0002,
+      "loss": 1.1244,
+      "step": 475
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.6232830882072449,
+      "learning_rate": 0.0002,
+      "loss": 1.2055,
+      "step": 500
+    },
+    {
+      "epoch": 0.525,
+      "grad_norm": 0.20547091960906982,
+      "learning_rate": 0.0002,
+      "loss": 1.1497,
+      "step": 525
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 0.489362508058548,
+      "learning_rate": 0.0002,
+      "loss": 1.2601,
+      "step": 550
+    },
+    {
+      "epoch": 0.575,
+      "grad_norm": 0.1969325989484787,
+      "learning_rate": 0.0002,
+      "loss": 1.1765,
+      "step": 575
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 0.6766378879547119,
+      "learning_rate": 0.0002,
+      "loss": 1.5613,
+      "step": 600
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 0.13454963266849518,
+      "learning_rate": 0.0002,
+      "loss": 1.2215,
+      "step": 625
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 0.32630065083503723,
+      "learning_rate": 0.0002,
+      "loss": 1.3704,
+      "step": 650
+    },
+    {
+      "epoch": 0.675,
+      "grad_norm": 0.2560375928878784,
+      "learning_rate": 0.0002,
+      "loss": 1.0987,
+      "step": 675
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 1.6413543224334717,
+      "learning_rate": 0.0002,
+      "loss": 1.5937,
+      "step": 700
+    },
+    {
+      "epoch": 0.725,
+      "grad_norm": 0.2766744792461395,
+      "learning_rate": 0.0002,
+      "loss": 1.1834,
+      "step": 725
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 0.9574869275093079,
+      "learning_rate": 0.0002,
+      "loss": 1.4304,
+      "step": 750
+    },
+    {
+      "epoch": 0.775,
+      "grad_norm": 0.13615752756595612,
+      "learning_rate": 0.0002,
+      "loss": 1.1135,
+      "step": 775
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.6256314516067505,
+      "learning_rate": 0.0002,
+      "loss": 1.3999,
+      "step": 800
+    },
+    {
+      "epoch": 0.825,
+      "grad_norm": 0.2913402020931244,
+      "learning_rate": 0.0002,
+      "loss": 1.0998,
+      "step": 825
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 0.8512757420539856,
+      "learning_rate": 0.0002,
+      "loss": 1.5836,
+      "step": 850
+    },
+    {
+      "epoch": 0.875,
+      "grad_norm": 0.1915389448404312,
+      "learning_rate": 0.0002,
+      "loss": 1.1269,
+      "step": 875
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 0.6881994605064392,
+      "learning_rate": 0.0002,
+      "loss": 1.4682,
+      "step": 900
+    },
+    {
+      "epoch": 0.925,
+      "grad_norm": 0.15422898530960083,
+      "learning_rate": 0.0002,
+      "loss": 1.0977,
+      "step": 925
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 0.775578498840332,
+      "learning_rate": 0.0002,
+      "loss": 1.6679,
+      "step": 950
+    },
+    {
+      "epoch": 0.975,
+      "grad_norm": 0.28175973892211914,
+      "learning_rate": 0.0002,
+      "loss": 1.2866,
+      "step": 975
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.5729475021362305,
+      "learning_rate": 0.0002,
+      "loss": 1.3361,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 25,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.679542884421632e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2dd63cd40626facaab305a42f6defc2bbcbeebadb21704948ac692f5f455b95
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:ced240fd53834fa3700f173685f9aeeb3a5d2ca39269ec93a8f139d341c48537
 size 5432