Alphatao commited on
Commit
227ddae
·
verified ·
1 Parent(s): bb1066c

Training in progress, step 50

Browse files
adapter_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 16,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "down_proj",
28
+ "k_proj",
29
+ "v_proj",
30
+ "o_proj",
31
+ "up_proj",
32
+ "gate_proj",
33
+ "q_proj"
34
+ ],
35
+ "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
+ "use_dora": false,
38
+ "use_rslora": false
39
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6fb2c47944fd2f4753270ee8c6d1227502fbbae8e6e3288d2bf4ea334ad6b88
3
+ size 228150120
config.json CHANGED
@@ -1,37 +1,29 @@
1
  {
 
2
  "architectures": [
3
- "LlamaForCausalLM"
4
  ],
5
- "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 128000,
8
- "eos_token_id": 128009,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
- "hidden_size": 4096,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 14336,
14
  "max_position_embeddings": 131072,
15
- "mlp_bias": false,
16
- "model_type": "llama",
17
  "num_attention_heads": 32,
18
- "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
- "pad_token_id": 128004,
21
- "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
- "rope_scaling": {
24
- "factor": 8.0,
25
- "high_freq_factor": 4.0,
26
- "low_freq_factor": 1.0,
27
- "original_max_position_embeddings": 8192,
28
- "rope_type": "llama3"
29
- },
30
- "rope_theta": 500000.0,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
  "transformers_version": "4.51.3",
34
  "unsloth_fixed": true,
35
  "use_cache": false,
36
- "vocab_size": 128256
37
  }
 
1
  {
2
+ "_attn_implementation_autoset": true,
3
  "architectures": [
4
+ "MistralForCausalLM"
5
  ],
 
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
+ "hidden_size": 5120,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 14336,
14
  "max_position_embeddings": 131072,
15
+ "model_type": "mistral",
 
16
  "num_attention_heads": 32,
17
+ "num_hidden_layers": 40,
18
  "num_key_value_heads": 8,
19
+ "pad_token_id": 10,
 
20
  "rms_norm_eps": 1e-05,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
 
 
 
 
 
 
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.51.3",
26
  "unsloth_fixed": true,
27
  "use_cache": false,
28
+ "vocab_size": 131072
29
  }
runs/May14_10-03-19_ebc80799ced7/events.out.tfevents.1747217007.ebc80799ced7.317.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:040694dfe09a955942032e0894affd5eba6190e9ea0cf3199e4e584ea2ddb549
3
+ size 95184
special_tokens_map.json CHANGED
@@ -1,20 +1,27 @@
1
  {
2
  "bos_token": {
3
- "content": "<|begin_of_text|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|eot_id|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|finetune_right_pad_id|>",
 
 
 
 
 
 
 
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
1
  {
2
  "bos_token": {
3
+ "content": "<s>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "</s>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a65c6c5f9764771aa485e6a1f5e63d7d9af8477fe0777148c17476ecb2e09a05
3
- size 17210099
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bf3b3d6214dce22c2aa088e17fd3d059bb7069b0b1c8098ace2867d4b96b20a
3
+ size 17078447
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff6285bdabe274e5aeca6214ff1535bead1951e6928a03913d8d6340a406062f
3
  size 7864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:226b438e87695b85d63c151ba818b85c53ddf1a754680309eee987646ce50f52
3
  size 7864