AlexCuadron commited on
Commit
6aa07bf
·
verified ·
1 Parent(s): 699f039

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,7 +1,6 @@
1
  ---
2
  base_model:
3
  - all-hands/openhands-lm-32b-v0.1
4
- - Qwen/Qwen2.5-32B
5
  - all-hands/openhands-lm-32b-v0.1-ep3
6
  library_name: transformers
7
  tags:
@@ -16,12 +15,11 @@ This is a merge of pre-trained language models created using [mergekit](https://
16
  ## Merge Details
17
  ### Merge Method
18
 
19
- This model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [Qwen/Qwen2.5-32B](https://huggingface.co/Qwen/Qwen2.5-32B) as a base.
20
 
21
  ### Models Merged
22
 
23
  The following models were included in the merge:
24
- * [all-hands/openhands-lm-32b-v0.1](https://huggingface.co/all-hands/openhands-lm-32b-v0.1)
25
  * [all-hands/openhands-lm-32b-v0.1-ep3](https://huggingface.co/all-hands/openhands-lm-32b-v0.1-ep3)
26
 
27
  ### Configuration
@@ -30,10 +28,11 @@ The following YAML configuration was used to produce this model:
30
 
31
  ```yaml
32
  models:
33
- - model: all-hands/openhands-lm-32b-v0.1
34
  - model: all-hands/openhands-lm-32b-v0.1-ep3
35
- base_model: Qwen/Qwen2.5-32B
36
- merge_method: model_stock
 
 
37
  dtype: float16
38
 
39
  ```
 
1
  ---
2
  base_model:
3
  - all-hands/openhands-lm-32b-v0.1
 
4
  - all-hands/openhands-lm-32b-v0.1-ep3
5
  library_name: transformers
6
  tags:
 
15
  ## Merge Details
16
  ### Merge Method
17
 
18
+ This model was merged using the [Linear DARE](https://arxiv.org/abs/2311.03099) merge method using [all-hands/openhands-lm-32b-v0.1](https://huggingface.co/all-hands/openhands-lm-32b-v0.1) as a base.
19
 
20
  ### Models Merged
21
 
22
  The following models were included in the merge:
 
23
  * [all-hands/openhands-lm-32b-v0.1-ep3](https://huggingface.co/all-hands/openhands-lm-32b-v0.1-ep3)
24
 
25
  ### Configuration
 
28
 
29
  ```yaml
30
  models:
 
31
  - model: all-hands/openhands-lm-32b-v0.1-ep3
32
+ parameters:
33
+ weight: 0.5
34
+ base_model: all-hands/openhands-lm-32b-v0.1
35
+ merge_method: dare_linear
36
  dtype: float16
37
 
38
  ```
config.json CHANGED
@@ -4,19 +4,24 @@
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 151643,
7
- "eos_token_id": 151643,
8
  "hidden_act": "silu",
9
  "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 27648,
12
- "max_position_embeddings": 131072,
13
  "max_window_layers": 64,
14
  "model_type": "qwen2",
15
  "num_attention_heads": 40,
16
  "num_hidden_layers": 64,
17
  "num_key_value_heads": 8,
18
- "rms_norm_eps": 1e-05,
19
- "rope_scaling": null,
 
 
 
 
 
20
  "rope_theta": 1000000.0,
21
  "sliding_window": 131072,
22
  "tie_word_embeddings": false,
 
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
  "hidden_act": "silu",
9
  "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 27648,
12
+ "max_position_embeddings": 32768,
13
  "max_window_layers": 64,
14
  "model_type": "qwen2",
15
  "num_attention_heads": 40,
16
  "num_hidden_layers": 64,
17
  "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_scaling": {
20
+ "factor": 4.0,
21
+ "original_max_position_embeddings": 32768,
22
+ "rope_type": "yarn",
23
+ "type": "yarn"
24
+ },
25
  "rope_theta": 1000000.0,
26
  "sliding_window": 131072,
27
  "tie_word_embeddings": false,
mergekit_config.yml CHANGED
@@ -1,6 +1,7 @@
1
  models:
2
- - model: all-hands/openhands-lm-32b-v0.1
3
  - model: all-hands/openhands-lm-32b-v0.1-ep3
4
- base_model: Qwen/Qwen2.5-32B
5
- merge_method: model_stock
 
 
6
  dtype: float16
 
1
  models:
 
2
  - model: all-hands/openhands-lm-32b-v0.1-ep3
3
+ parameters:
4
+ weight: 0.5
5
+ base_model: all-hands/openhands-lm-32b-v0.1
6
+ merge_method: dare_linear
7
  dtype: float16
model-00001-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd176df6e4284ad04943e348552cabfec6c244f559d7d17b17c87422a1b4c9a4
3
  size 4949338448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8645db4efa02ae9042441fb4a88e5e5971a71e463e6cc509b6977da59ee8be9
3
  size 4949338448
model-00002-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81c25b388b88f65ad83f887042a9a61a8905f200142f9994210baaff18c9e165
3
  size 4991425912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30c401176a4b279c60113f29b32cb9d414b701531847798f52376823311a78e6
3
  size 4991425912
model-00003-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d581a872134649bb7e69793d5b0050965314659e9ec8d36b5a180ffa1466f3a
3
  size 4876059352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c2bda70ae5fc3554d27a3874c74c5a9b966a6f2bbdb188e5487d88052f4c441
3
  size 4876059352
model-00004-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e99eab5007e2861f147dd638df8c6ef0e9075df166bc72c9a602c3479465e80d
3
  size 4876059336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d7608449a48909f287df9fb3e2e6b5375ae65fb8a1fe9249c1049fc21173a5a
3
  size 4876059336
model-00005-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00eac163fadc5564d58a4515ea1dd1b2c150794c3b64b5b0f4662fd0c922cf2d
3
  size 4876059352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ee822ef866aebb3a4829ff9de61b79e7d69ce8cf2aa3df11e7ecfac49b38ce
3
  size 4876059352
model-00006-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1448b88615475a7c29e74f36987c99ce33b6ccae5f5824f07d610735783f94c
3
  size 4876059336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:753db15ed1e304b0772c7186a1b0f21cb316dc144fe45fd06d68ff4434b5ea8a
3
  size 4876059336
model-00007-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2570d1ce042a135ff02be48388869fe71f1354dfe8c5a7356437ec8d844edce
3
  size 4876059352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a841eb296ce4fcf76190949b34a5bf26b90660f399e7ab7c1321675c9b805bc0
3
  size 4876059352
model-00008-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbc959af8a566aaf4da78b8e2d88373374936d52312b3641039acab8f73964f1
3
  size 4876059336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5247f684063c0ca8fc7c61037baf9af8a32126419c234187fcf490d89e247e6b
3
  size 4876059336
model-00009-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d7f84526459e48d9b7fd2f551b4501d0fdd9ab52eac54f49689ac5796ac5e6d
3
  size 4876059352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:befb7d41b5acdc95feda1a9593c99181dcb0506a7a0d707ae951b232f83fd47d
3
  size 4876059352
model-00010-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20c097d4d84b503650766c582d11fbb14be0383207b0b3ad3e541eba725d7560
3
  size 4876059336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b4ae11bbf36e0141aae55cf07978363d81e60e97eefe10f39c383c44857abe
3
  size 4876059336
model-00011-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7baadbda4c33ac62b966d10bc2f4733836f317155c2ae5c18c72ff255c3fd01
3
  size 4876059352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ea4d07fd7d8a01d79d6a86c001578c3106d2b9dd849e44d32f801609f6d62c4
3
  size 4876059352
model-00012-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98ca80aa7384c177099a81ec693d926b73c1752622d4c1903d0e95b08a5d92f0
3
  size 4876059336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d92b0dc49a6d68347a1726751d7328acab51e8654f421e9a83a82a46cf0cdb0
3
  size 4876059336
model-00013-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fd7a37b9a44a5677b8a05093773786b2e078c33d8698cac067d6df2f86fa810
3
  size 4876059336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d6934850a572eaa821625525f98c1565dfb18ef17ebc8ad1391dca92b3b9450
3
  size 4876059336
model-00014-of-00014.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e92b133d32303f4b10d71d24558b12f3eeb6340143fab3f8545309a4db8ae28a
3
  size 1950423704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf19f4af75bd3bbe3e3ea8dcff703b4cef1103ff08c89f47501d281e0137bbc0
3
  size 1950423704
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0382117ea329cdf097041132f6d735924b697924d6f6fc3945713e96ce87539
3
- size 7031645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
tokenizer_config.json CHANGED
@@ -195,13 +195,14 @@
195
  "<|video_pad|>"
196
  ],
197
  "bos_token": null,
198
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
  "clean_up_tokenization_spaces": false,
200
- "eos_token": "<|endoftext|>",
201
  "errors": "replace",
 
202
  "model_max_length": 131072,
203
  "pad_token": "<|endoftext|>",
204
  "split_special_tokens": false,
205
  "tokenizer_class": "Qwen2Tokenizer",
206
  "unk_token": null
207
- }
 
195
  "<|video_pad|>"
196
  ],
197
  "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
  "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
+ "extra_special_tokens": {},
203
  "model_max_length": 131072,
204
  "pad_token": "<|endoftext|>",
205
  "split_special_tokens": false,
206
  "tokenizer_class": "Qwen2Tokenizer",
207
  "unk_token": null
208
+ }