Aa123564 commited on
Commit
d7019e6
·
verified ·
1 Parent(s): 454c418

Model save

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ library_name: peft
4
  tags:
5
  - trl
6
  - sft
 
7
  - generated_from_trainer
8
  base_model: mistralai/Mistral-7B-v0.1
9
  datasets:
@@ -19,8 +20,6 @@ should probably proofread and complete it, then remove this comment. -->
19
  # zephyr-7b-sft-qlora
20
 
21
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
22
- It achieves the following results on the evaluation set:
23
- - Loss: 0.7922
24
 
25
  ## Model description
26
 
@@ -52,19 +51,15 @@ The following hyperparameters were used during training:
52
  - lr_scheduler_type: cosine
53
  - lr_scheduler_warmup_ratio: 0.1
54
  - num_epochs: 1
55
- - mixed_precision_training: Native AMP
56
 
57
  ### Training results
58
 
59
- | Training Loss | Epoch | Step | Validation Loss |
60
- |:-------------:|:------:|:----:|:---------------:|
61
- | 0.7616 | 0.9999 | 9785 | 0.7922 |
62
 
63
 
64
  ### Framework versions
65
 
66
- - PEFT 0.7.1
67
- - Transformers 4.40.1
68
  - Pytorch 2.1.2
69
- - Datasets 2.19.0
70
- - Tokenizers 0.19.1
 
4
  tags:
5
  - trl
6
  - sft
7
+ - alignment-handbook
8
  - generated_from_trainer
9
  base_model: mistralai/Mistral-7B-v0.1
10
  datasets:
 
20
  # zephyr-7b-sft-qlora
21
 
22
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
 
 
23
 
24
  ## Model description
25
 
 
51
  - lr_scheduler_type: cosine
52
  - lr_scheduler_warmup_ratio: 0.1
53
  - num_epochs: 1
 
54
 
55
  ### Training results
56
 
 
 
 
57
 
58
 
59
  ### Framework versions
60
 
61
+ - PEFT 0.9.0
62
+ - Transformers 4.39.3
63
  - Pytorch 2.1.2
64
+ - Datasets 2.18.0
65
+ - Tokenizers 0.15.2
adapter_config.json CHANGED
@@ -20,12 +20,14 @@
20
  "revision": null,
21
  "target_modules": [
22
  "up_proj",
 
23
  "k_proj",
 
24
  "gate_proj",
25
  "v_proj",
26
- "o_proj",
27
- "down_proj",
28
- "q_proj"
29
  ],
30
- "task_type": "CAUSAL_LM"
 
 
31
  }
 
20
  "revision": null,
21
  "target_modules": [
22
  "up_proj",
23
+ "q_proj",
24
  "k_proj",
25
+ "o_proj",
26
  "gate_proj",
27
  "v_proj",
28
+ "down_proj"
 
 
29
  ],
30
+ "task_type": "CAUSAL_LM",
31
+ "use_dora": false,
32
+ "use_rslora": false
33
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:309f0cd223f5f25bbff9bd251fcd3f8f6d44f27ab421f1bac96fc8ac9a99b762
3
- size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527d98212d9cdb166d3f3175fd164a01ccdfc72041f5c46b14e7a06f06267de5
3
+ size 83946192
all_results.json CHANGED
@@ -1,9 +1,14 @@
1
  {
2
- "epoch": 0.9999489039905983,
3
- "total_flos": 2.752048962850731e+19,
4
- "train_loss": 0.9653273651900801,
5
- "train_runtime": 48538.6259,
6
- "train_samples": 1299087,
7
- "train_samples_per_second": 6.451,
8
- "train_steps_per_second": 0.202
 
 
 
 
 
9
  }
 
1
  {
2
+ "epoch": 0.99,
3
+ "eval_loss": 0.9485585689544678,
4
+ "eval_runtime": 565.6562,
5
+ "eval_samples": 23109,
6
+ "eval_samples_per_second": 27.28,
7
+ "eval_steps_per_second": 0.854,
8
+ "total_flos": 1.2254141370096157e+19,
9
+ "train_loss": 0.0,
10
+ "train_runtime": 0.0433,
11
+ "train_samples": 2078,
12
+ "train_samples_per_second": 31910.696,
13
+ "train_steps_per_second": 993.599
14
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_loss": 0.7585077881813049,
4
- "eval_runtime": 7252.7987,
5
- "eval_samples": 821583,
6
- "eval_samples_per_second": 27.291,
7
- "eval_steps_per_second": 0.853
8
  }
 
1
  {
2
+ "epoch": 0.9998852553069421,
3
+ "eval_loss": 0.9485585689544678,
4
+ "eval_runtime": 565.6562,
5
+ "eval_samples": 23109,
6
+ "eval_samples_per_second": 27.28,
7
+ "eval_steps_per_second": 0.854
8
  }
runs/May19_07-08-46_training-queue-st-p4d-24xlarge-1/events.out.tfevents.1716102745.training-queue-st-p4d-24xlarge-1.3797.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326af8353171ecf25d6d218ccff10fb8bce5d9afca88ecacbd8108c2ffe2d43d
3
+ size 5558
tokenizer.json CHANGED
@@ -134,7 +134,6 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
- "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
train_results.json CHANGED
@@ -1,9 +1,8 @@
1
  {
2
- "epoch": 0.9999489039905983,
3
- "total_flos": 2.752048962850731e+19,
4
- "train_loss": 0.9653273651900801,
5
- "train_runtime": 48538.6259,
6
- "train_samples": 1299087,
7
- "train_samples_per_second": 6.451,
8
- "train_steps_per_second": 0.202
9
  }
 
1
  {
2
+ "epoch": 0.99,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 0.0433,
5
+ "train_samples": 2078,
6
+ "train_samples_per_second": 31910.696,
7
+ "train_steps_per_second": 993.599
 
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaf129790d02402ef80ffe8262e569c9327ca62ae50ab7f0e7b51b9e9c4f0a2b
3
- size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2da86413667afdb3ef9626533f4a1f9a0770272a3f50fe1d8c7f322f97e85c2a
3
+ size 5048