AlexCuadron commited on May 5

Commit

6aa07bf

verified ·

1 Parent(s): 699f039

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

README.md +5 -6
config.json +9 -4
mergekit_config.yml +4 -3
model-00001-of-00014.safetensors +1 -1
model-00002-of-00014.safetensors +1 -1
model-00003-of-00014.safetensors +1 -1
model-00004-of-00014.safetensors +1 -1
model-00005-of-00014.safetensors +1 -1
model-00006-of-00014.safetensors +1 -1
model-00007-of-00014.safetensors +1 -1
model-00008-of-00014.safetensors +1 -1
model-00009-of-00014.safetensors +1 -1
model-00010-of-00014.safetensors +1 -1
model-00011-of-00014.safetensors +1 -1
model-00012-of-00014.safetensors +1 -1
model-00013-of-00014.safetensors +1 -1
model-00014-of-00014.safetensors +1 -1
tokenizer.json +2 -2
tokenizer_config.json +4 -3

README.md CHANGED Viewed

@@ -1,7 +1,6 @@
 ---
 base_model:
 - all-hands/openhands-lm-32b-v0.1
-- Qwen/Qwen2.5-32B
 - all-hands/openhands-lm-32b-v0.1-ep3
 library_name: transformers
 tags:
@@ -16,12 +15,11 @@ This is a merge of pre-trained language models created using [mergekit](https://
 ## Merge Details
 ### Merge Method
-This model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [Qwen/Qwen2.5-32B](https://huggingface.co/Qwen/Qwen2.5-32B) as a base.
 ### Models Merged
 The following models were included in the merge:
-* [all-hands/openhands-lm-32b-v0.1](https://huggingface.co/all-hands/openhands-lm-32b-v0.1)
 * [all-hands/openhands-lm-32b-v0.1-ep3](https://huggingface.co/all-hands/openhands-lm-32b-v0.1-ep3)
 ### Configuration
@@ -30,10 +28,11 @@ The following YAML configuration was used to produce this model:
 ```yaml
 models:
-  - model: all-hands/openhands-lm-32b-v0.1
   - model: all-hands/openhands-lm-32b-v0.1-ep3
-base_model: Qwen/Qwen2.5-32B
-merge_method: model_stock
 dtype: float16
 ```

 ---
 base_model:
 - all-hands/openhands-lm-32b-v0.1
 - all-hands/openhands-lm-32b-v0.1-ep3
 library_name: transformers
 tags:
 ## Merge Details
 ### Merge Method
+This model was merged using the [Linear DARE](https://arxiv.org/abs/2311.03099) merge method using [all-hands/openhands-lm-32b-v0.1](https://huggingface.co/all-hands/openhands-lm-32b-v0.1) as a base.
 ### Models Merged
 The following models were included in the merge:
 * [all-hands/openhands-lm-32b-v0.1-ep3](https://huggingface.co/all-hands/openhands-lm-32b-v0.1-ep3)
 ### Configuration
 ```yaml
 models:
   - model: all-hands/openhands-lm-32b-v0.1-ep3
+    parameters:
+      weight: 0.5
+base_model: all-hands/openhands-lm-32b-v0.1
+merge_method: dare_linear
 dtype: float16
 ```

config.json CHANGED Viewed

@@ -4,19 +4,24 @@
   ],
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
-  "eos_token_id": 151643,
   "hidden_act": "silu",
   "hidden_size": 5120,
   "initializer_range": 0.02,
   "intermediate_size": 27648,
-  "max_position_embeddings": 131072,
   "max_window_layers": 64,
   "model_type": "qwen2",
   "num_attention_heads": 40,
   "num_hidden_layers": 64,
   "num_key_value_heads": 8,
-  "rms_norm_eps": 1e-05,
-  "rope_scaling": null,
   "rope_theta": 1000000.0,
   "sliding_window": 131072,
   "tie_word_embeddings": false,

   ],
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
+  "eos_token_id": 151645,
   "hidden_act": "silu",
   "hidden_size": 5120,
   "initializer_range": 0.02,
   "intermediate_size": 27648,
+  "max_position_embeddings": 32768,
   "max_window_layers": 64,
   "model_type": "qwen2",
   "num_attention_heads": 40,
   "num_hidden_layers": 64,
   "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "factor": 4.0,
+    "original_max_position_embeddings": 32768,
+    "rope_type": "yarn",
+    "type": "yarn"
+  },
   "rope_theta": 1000000.0,
   "sliding_window": 131072,
   "tie_word_embeddings": false,

mergekit_config.yml CHANGED Viewed

@@ -1,6 +1,7 @@
 models:
-  - model: all-hands/openhands-lm-32b-v0.1
   - model: all-hands/openhands-lm-32b-v0.1-ep3
-base_model: Qwen/Qwen2.5-32B
-merge_method: model_stock
 dtype: float16

 models:
   - model: all-hands/openhands-lm-32b-v0.1-ep3
+    parameters:
+      weight: 0.5
+base_model: all-hands/openhands-lm-32b-v0.1
+merge_method: dare_linear
 dtype: float16

model-00001-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd176df6e4284ad04943e348552cabfec6c244f559d7d17b17c87422a1b4c9a4
 size 4949338448

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8645db4efa02ae9042441fb4a88e5e5971a71e463e6cc509b6977da59ee8be9
 size 4949338448

model-00002-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81c25b388b88f65ad83f887042a9a61a8905f200142f9994210baaff18c9e165
 size 4991425912

 version https://git-lfs.github.com/spec/v1
+oid sha256:30c401176a4b279c60113f29b32cb9d414b701531847798f52376823311a78e6
 size 4991425912

model-00003-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d581a872134649bb7e69793d5b0050965314659e9ec8d36b5a180ffa1466f3a
 size 4876059352

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c2bda70ae5fc3554d27a3874c74c5a9b966a6f2bbdb188e5487d88052f4c441
 size 4876059352

model-00004-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e99eab5007e2861f147dd638df8c6ef0e9075df166bc72c9a602c3479465e80d
 size 4876059336

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d7608449a48909f287df9fb3e2e6b5375ae65fb8a1fe9249c1049fc21173a5a
 size 4876059336

model-00005-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00eac163fadc5564d58a4515ea1dd1b2c150794c3b64b5b0f4662fd0c922cf2d
 size 4876059352

 version https://git-lfs.github.com/spec/v1
+oid sha256:88ee822ef866aebb3a4829ff9de61b79e7d69ce8cf2aa3df11e7ecfac49b38ce
 size 4876059352

model-00006-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1448b88615475a7c29e74f36987c99ce33b6ccae5f5824f07d610735783f94c
 size 4876059336

 version https://git-lfs.github.com/spec/v1
+oid sha256:753db15ed1e304b0772c7186a1b0f21cb316dc144fe45fd06d68ff4434b5ea8a
 size 4876059336

model-00007-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2570d1ce042a135ff02be48388869fe71f1354dfe8c5a7356437ec8d844edce
 size 4876059352

 version https://git-lfs.github.com/spec/v1
+oid sha256:a841eb296ce4fcf76190949b34a5bf26b90660f399e7ab7c1321675c9b805bc0
 size 4876059352

model-00008-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbc959af8a566aaf4da78b8e2d88373374936d52312b3641039acab8f73964f1
 size 4876059336

 version https://git-lfs.github.com/spec/v1
+oid sha256:5247f684063c0ca8fc7c61037baf9af8a32126419c234187fcf490d89e247e6b
 size 4876059336

model-00009-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d7f84526459e48d9b7fd2f551b4501d0fdd9ab52eac54f49689ac5796ac5e6d
 size 4876059352

 version https://git-lfs.github.com/spec/v1
+oid sha256:befb7d41b5acdc95feda1a9593c99181dcb0506a7a0d707ae951b232f83fd47d
 size 4876059352

model-00010-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20c097d4d84b503650766c582d11fbb14be0383207b0b3ad3e541eba725d7560
 size 4876059336

 version https://git-lfs.github.com/spec/v1
+oid sha256:72b4ae11bbf36e0141aae55cf07978363d81e60e97eefe10f39c383c44857abe
 size 4876059336

model-00011-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7baadbda4c33ac62b966d10bc2f4733836f317155c2ae5c18c72ff255c3fd01
 size 4876059352

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ea4d07fd7d8a01d79d6a86c001578c3106d2b9dd849e44d32f801609f6d62c4
 size 4876059352

model-00012-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98ca80aa7384c177099a81ec693d926b73c1752622d4c1903d0e95b08a5d92f0
 size 4876059336

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d92b0dc49a6d68347a1726751d7328acab51e8654f421e9a83a82a46cf0cdb0
 size 4876059336

model-00013-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8fd7a37b9a44a5677b8a05093773786b2e078c33d8698cac067d6df2f86fa810
 size 4876059336

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d6934850a572eaa821625525f98c1565dfb18ef17ebc8ad1391dca92b3b9450
 size 4876059336

model-00014-of-00014.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e92b133d32303f4b10d71d24558b12f3eeb6340143fab3f8545309a4db8ae28a
 size 1950423704

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf19f4af75bd3bbe3e3ea8dcff703b4cef1103ff08c89f47501d281e0137bbc0
 size 1950423704

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0382117ea329cdf097041132f6d735924b697924d6f6fc3945713e96ce87539
-size 7031645

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

tokenizer_config.json CHANGED Viewed

@@ -195,13 +195,14 @@
     "<|video_pad|>"
   ],
   "bos_token": null,
-  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|endoftext|>",
   "errors": "replace",
   "model_max_length": 131072,
   "pad_token": "<|endoftext|>",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",
   "unk_token": null
-}

     "<|video_pad|>"
   ],
   "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
   "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
   "errors": "replace",
+  "extra_special_tokens": {},
   "model_max_length": 131072,
   "pad_token": "<|endoftext|>",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",
   "unk_token": null
+}