Hasnonname commited on
Commit
f984c77
·
verified ·
1 Parent(s): 8d12fd8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -239
README.md CHANGED
@@ -8,242 +8,4 @@ model-index:
8
  - name: marigold-fft
9
  results: []
10
  ---
11
-
12
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
- should probably proofread and complete it, then remove this comment. -->
14
-
15
- [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
16
- <details><summary>See axolotl config</summary>
17
-
18
- axolotl version: `0.8.0.dev0`
19
- ```yaml
20
- base_model: Qwen/Qwen2.5-32B
21
- model_type: AutoModelForCausalLM
22
-
23
- load_in_8bit: false
24
- load_in_4bit: false
25
- strict: false
26
-
27
- chat_template: jinja
28
- chat_template_jinja: "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{%- if messages[0]['role'] == 'system' %}{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}{%- else %}{{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}{%- endif %}{%- for message in messages %}{%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}{%- elif message.role == \"assistant\" %}{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}{%- endif %}{%- endfor %}{%- if add_generation_prompt %}{{- '<|im_start|>assistant\n' }}{%- endif %}"
29
- datasets:
30
- # Continued pretrain: novels, short stories
31
- - path: datasets/Sugarquill10k_Clean.jsonl
32
- type: completion
33
- - path: datasets/Mixed-Novels-Completions.jsonl
34
- type: completion
35
- - path: datasets/Mixed-Novels-Completions-2.jsonl
36
- type: completion
37
- - path: datasets/recursal-scp-8k-filtered-4k.jsonl
38
- type: completion
39
- # overfitting on disco elysium
40
- - path: datasets/disco.jsonl
41
- type: completion
42
- - path: datasets/disco-chat.json
43
- type: completion
44
-
45
- # Instruct datasets (local)
46
- - path: datasets/competition-math-sharegpt.jsonl
47
- type: chat_template
48
- field_messages: conversations
49
- message_field_role: from
50
- message_field_content: value
51
- - path: datasets/systemchat-1k-sharegpt.jsonl
52
- type: chat_template
53
- field_messages: conversations
54
- message_field_role: from
55
- message_field_content: value
56
- - path: datasets/opencai_rp_sharegpt.json
57
- type: chat_template
58
- field_messages: conversations
59
- message_field_role: from
60
- message_field_content: value
61
- - path: datasets/floyd-instruct-8k.jsonl
62
- type: chat_template
63
- field_messages: conversations
64
- message_field_role: from
65
- message_field_content: value
66
- - path: datasets/woke-identity.jsonl
67
- type: chat_template
68
- field_messages: conversations
69
- message_field_role: from
70
- message_field_content: value
71
- - path: datasets/Claude-Sonnet35-Charcard-Unslop-v2.json
72
- type: chat_template
73
- field_messages: conversations
74
- message_field_role: from
75
- message_field_content: value
76
- - path: datasets/kalo-opus-22k-unslop.json
77
- type: chat_template
78
- field_messages: conversations
79
- message_field_role: from
80
- message_field_content: value
81
- - path: datasets/gryphe-4o-WP-sharegpt-cleaned.json
82
- type: chat_template
83
- field_messages: conversations
84
- message_field_role: from
85
- message_field_content: value
86
- - path: datasets/discord-logs-cleaned-sharegpt.json
87
- type: chat_template
88
- field_messages: conversations
89
- message_field_role: from
90
- message_field_content: value
91
- - path: datasets/creative-writing-multiturn-16k.json
92
- type: chat_template
93
- field_messages: conversations
94
- message_field_role: from
95
- message_field_content: value
96
- - path: datasets/limo-sharegpt.jsonl
97
- type: chat_template
98
- field_messages: conversations
99
- message_field_role: from
100
- message_field_content: value
101
- - path: datasets/tot-bio-2k-subset-sharegpt.json
102
- type: chat_template
103
- field_messages: conversations
104
- message_field_role: from
105
- message_field_content: value
106
- - path: datasets/gpqa-benchmaxxing.jsonl
107
- type: chat_template
108
- field_messages: conversations
109
- message_field_role: from
110
- message_field_content: value
111
- - path: datasets/UBW_Tapestries.json
112
- type: chat_template
113
- field_messages: conversations
114
- message_field_role: from
115
- message_field_content: value
116
- - path: datasets/medical-o1-reasoning-default-sys.jsonl
117
- type: chat_template
118
- field_messages: conversations
119
- message_field_role: from
120
- message_field_content: value
121
-
122
- shuffle_merged_datasets: true
123
-
124
- special_tokens:
125
- eos_token: "<|im_end|>"
126
-
127
- dataset_prepared_path: last_run_prepared
128
- val_set_size: 0.02
129
- output_dir: ./marigold-fft
130
-
131
- sequence_len: 10240 # could try 10240 too?
132
- sample_packing: true
133
- eval_sample_packing: false
134
- pad_to_sequence_len: true
135
-
136
- gradient_accumulation_steps: 8
137
- micro_batch_size: 1
138
- num_epochs: 3
139
- optimizer: paged_adamw_8bit
140
- lr_scheduler: cosine
141
- learning_rate: 5e-5
142
-
143
- weight_decay: 0.1
144
- max_grad_norm: 3
145
-
146
- train_on_inputs: false
147
- group_by_length: false
148
- bf16: auto
149
- fp16:
150
- tf32: false
151
-
152
- gradient_checkpointing: unsloth
153
- gradient_checkpointing_kwargs:
154
- use_reentrant: true
155
- early_stopping_patience:
156
- resume_from_checkpoint:
157
- local_rank:
158
- logging_steps: 1
159
- xformers_attention:
160
- flash_attention: true
161
-
162
- warmup_ratio: 0.05
163
- evals_per_epoch: 4
164
- eval_table_size:
165
-
166
- saves_per_epoch: 4
167
- save_total_limit: 5
168
-
169
- debug:
170
-
171
- plugins:
172
- - axolotl.integrations.liger.LigerPlugin
173
- liger_rope: true
174
- liger_rms_norm: true
175
- liger_glu_activation: true
176
- liger_layer_norm: true
177
- liger_fused_linear_cross_entropy: true
178
-
179
- deepspeed: deepspeed_configs/zero3_bf16.json # multigpu only, maybe zero3_bf16_cpuoffload_params if OOM
180
-
181
- wandb_project: Qwen2.5-32B-Marigold-v0
182
- wandb_entity:
183
- wandb_name: Marigold-v0-fft
184
-
185
- ```
186
-
187
- </details><br>
188
-
189
- # marigold-fft
190
-
191
- This model is a fine-tuned version of [Qwen/Qwen2.5-32B](https://huggingface.co/Qwen/Qwen2.5-32B) on the (insert datasets here) datasets.
192
- It achieves the following results on the evaluation set:
193
- - Loss: 2.2256
194
-
195
- ## Model description
196
-
197
- More information needed
198
-
199
- ## Intended uses & limitations
200
-
201
- More information needed
202
-
203
- ## Training and evaluation data
204
-
205
- More information needed
206
-
207
- ## Training procedure
208
-
209
- ### Training hyperparameters
210
-
211
- The following hyperparameters were used during training:
212
- - learning_rate: 5e-05
213
- - train_batch_size: 1
214
- - eval_batch_size: 1
215
- - seed: 42
216
- - distributed_type: multi-GPU
217
- - num_devices: 8
218
- - gradient_accumulation_steps: 8
219
- - total_train_batch_size: 64
220
- - total_eval_batch_size: 8
221
- - optimizer: Use paged_adamw_8bit with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
222
- - lr_scheduler_type: cosine
223
- - lr_scheduler_warmup_steps: 27
224
- - num_epochs: 3.0
225
-
226
- ### Training results
227
-
228
- | Training Loss | Epoch | Step | Validation Loss |
229
- |:-------------:|:------:|:----:|:---------------:|
230
- | 2.0226 | 0.0054 | 1 | 3.5764 |
231
- | 1.8588 | 0.2522 | 47 | 2.2616 |
232
- | 1.7952 | 0.5044 | 94 | 2.1989 |
233
- | 1.8266 | 0.7565 | 141 | 2.1619 |
234
- | 1.5997 | 1.0107 | 188 | 2.1603 |
235
- | 1.5134 | 1.2629 | 235 | 2.1552 |
236
- | 1.4614 | 1.5151 | 282 | 2.1373 |
237
- | 1.6362 | 1.7673 | 329 | 2.1242 |
238
- | 1.1374 | 2.0215 | 376 | 2.2327 |
239
- | 1.2301 | 2.2736 | 423 | 2.2244 |
240
- | 1.1773 | 2.5258 | 470 | 2.2257 |
241
- | 1.1864 | 2.7780 | 517 | 2.2256 |
242
-
243
-
244
- ### Framework versions
245
-
246
- - Transformers 4.49.0
247
- - Pytorch 2.5.1+cu124
248
- - Datasets 3.2.0
249
- - Tokenizers 0.21.0
 
8
  - name: marigold-fft
9
  results: []
10
  ---
11
+ # Qwen2.5-32B-Marigold-v1-ep3