pkarypis commited on
Commit
74d5000
·
verified ·
1 Parent(s): 2231ef9

Model save

Browse files
README.md CHANGED
@@ -2,16 +2,12 @@
2
  license: apache-2.0
3
  base_model: JackFram/llama-68m
4
  tags:
5
- - alignment-handbook
6
- - trl
7
- - sft
8
- - generated_from_trainer
9
  - trl
10
  - sft
11
  - alignment-handbook
12
  - generated_from_trainer
13
  datasets:
14
- - HuggingFaceH4/ultrachat_200k
15
  model-index:
16
  - name: gpt2-sft-port
17
  results: []
@@ -22,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # gpt2-sft-port
24
 
25
- This model is a fine-tuned version of [JackFram/llama-68m](https://huggingface.co/JackFram/llama-68m) on the HuggingFaceH4/ultrachat_200k dataset.
26
  It achieves the following results on the evaluation set:
27
  - Loss: 2.0739
28
 
 
2
  license: apache-2.0
3
  base_model: JackFram/llama-68m
4
  tags:
 
 
 
 
5
  - trl
6
  - sft
7
  - alignment-handbook
8
  - generated_from_trainer
9
  datasets:
10
+ - generator
11
  model-index:
12
  - name: gpt2-sft-port
13
  results: []
 
18
 
19
  # gpt2-sft-port
20
 
21
+ This model is a fine-tuned version of [JackFram/llama-68m](https://huggingface.co/JackFram/llama-68m) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
  - Loss: 2.0739
24
 
all_results.json CHANGED
@@ -6,8 +6,8 @@
6
  "eval_samples_per_second": 1175.543,
7
  "eval_steps_per_second": 36.76,
8
  "train_loss": 0.007307469739229727,
9
- "train_runtime": 34.7311,
10
  "train_samples": 207865,
11
- "train_samples_per_second": 16637.979,
12
- "train_steps_per_second": 130.027
13
  }
 
6
  "eval_samples_per_second": 1175.543,
7
  "eval_steps_per_second": 36.76,
8
  "train_loss": 0.007307469739229727,
9
+ "train_runtime": 35.2869,
10
  "train_samples": 207865,
11
+ "train_samples_per_second": 16375.952,
12
+ "train_steps_per_second": 127.98
13
  }
config.json CHANGED
@@ -24,6 +24,6 @@
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.38.2",
27
- "use_cache": true,
28
  "vocab_size": 32000
29
  }
 
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.38.2",
27
+ "use_cache": false,
28
  "vocab_size": 32000
29
  }
runs/Apr11_17-34-44_aga39/events.out.tfevents.1712874917.aga39.1781327.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9c7ce4dfe96a7786a3b03c83c18890d2d859cbabce49dc13569fdeb67b80fd0
3
+ size 5953
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
  "train_loss": 0.007307469739229727,
4
- "train_runtime": 34.7311,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 16637.979,
7
- "train_steps_per_second": 130.027
8
  }
 
1
  {
2
  "epoch": 2.0,
3
  "train_loss": 0.007307469739229727,
4
+ "train_runtime": 35.2869,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 16375.952,
7
+ "train_steps_per_second": 127.98
8
  }
trainer_state.json CHANGED
@@ -6325,21 +6325,21 @@
6325
  },
6326
  {
6327
  "epoch": 2.0,
6328
- "grad_norm": 0.5596599381401034,
6329
  "learning_rate": 3.6153063554089653e-10,
6330
  "loss": 2.0555,
6331
  "step": 4505
6332
  },
6333
  {
6334
  "epoch": 2.0,
6335
- "grad_norm": 0.571958581114088,
6336
  "learning_rate": 1.0756328901018188e-10,
6337
  "loss": 2.0598,
6338
  "step": 4510
6339
  },
6340
  {
6341
  "epoch": 2.0,
6342
- "grad_norm": 0.534656420357718,
6343
  "learning_rate": 2.987874346827013e-12,
6344
  "loss": 2.054,
6345
  "step": 4515
@@ -6347,9 +6347,9 @@
6347
  {
6348
  "epoch": 2.0,
6349
  "eval_loss": 2.0739028453826904,
6350
- "eval_runtime": 26.2622,
6351
- "eval_samples_per_second": 1217.682,
6352
- "eval_steps_per_second": 38.078,
6353
  "step": 4516
6354
  },
6355
  {
@@ -6357,9 +6357,9 @@
6357
  "step": 4516,
6358
  "total_flos": 13637863342080.0,
6359
  "train_loss": 0.007307469739229727,
6360
- "train_runtime": 34.7311,
6361
- "train_samples_per_second": 16637.979,
6362
- "train_steps_per_second": 130.027
6363
  }
6364
  ],
6365
  "logging_steps": 5,
 
6325
  },
6326
  {
6327
  "epoch": 2.0,
6328
+ "grad_norm": 0.559661682911221,
6329
  "learning_rate": 3.6153063554089653e-10,
6330
  "loss": 2.0555,
6331
  "step": 4505
6332
  },
6333
  {
6334
  "epoch": 2.0,
6335
+ "grad_norm": 0.5719606164762697,
6336
  "learning_rate": 1.0756328901018188e-10,
6337
  "loss": 2.0598,
6338
  "step": 4510
6339
  },
6340
  {
6341
  "epoch": 2.0,
6342
+ "grad_norm": 0.5346578992785624,
6343
  "learning_rate": 2.987874346827013e-12,
6344
  "loss": 2.054,
6345
  "step": 4515
 
6347
  {
6348
  "epoch": 2.0,
6349
  "eval_loss": 2.0739028453826904,
6350
+ "eval_runtime": 26.6375,
6351
+ "eval_samples_per_second": 1200.527,
6352
+ "eval_steps_per_second": 37.541,
6353
  "step": 4516
6354
  },
6355
  {
 
6357
  "step": 4516,
6358
  "total_flos": 13637863342080.0,
6359
  "train_loss": 0.007307469739229727,
6360
+ "train_runtime": 35.2869,
6361
+ "train_samples_per_second": 16375.952,
6362
+ "train_steps_per_second": 127.98
6363
  }
6364
  ],
6365
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5178cccce4b851671b9974085a18000b0059f3e1f3f9ae82135b6872a00ff77c
3
  size 6072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:348f0659098e36c5a4f4c6183168dbfc1b21b9f74e08519031cd01e5095db1a8
3
  size 6072