zhoukz commited on
Commit
5967491
·
verified ·
1 Parent(s): 963f924

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -42,16 +42,16 @@
42
  "text_config": {
43
  "attention_dropout": 0.0,
44
  "hidden_act": "silu",
45
- "hidden_size": 2048,
46
  "init_std": 0.02,
47
  "initializer_range": 0.02,
48
- "intermediate_size": 11008,
49
  "max_position_embeddings": 32768,
50
- "max_window_layers": 70,
51
  "model_type": "qwen2_5_omni_text",
52
- "num_attention_heads": 16,
53
- "num_hidden_layers": 36,
54
- "num_key_value_heads": 2,
55
  "rms_norm_eps": 1e-06,
56
  "rope_scaling": {
57
  "mrope_section": [
@@ -66,7 +66,7 @@
66
  "sliding_window": 32768,
67
  "use_cache": true,
68
  "use_sliding_window": false,
69
- "vocab_size": 151936
70
  },
71
  "torch_dtype": "float32",
72
  "transformers_version": "4.52.4"
 
42
  "text_config": {
43
  "attention_dropout": 0.0,
44
  "hidden_act": "silu",
45
+ "hidden_size": 3584,
46
  "init_std": 0.02,
47
  "initializer_range": 0.02,
48
+ "intermediate_size": 18944,
49
  "max_position_embeddings": 32768,
50
+ "max_window_layers": 28,
51
  "model_type": "qwen2_5_omni_text",
52
+ "num_attention_heads": 28,
53
+ "num_hidden_layers": 28,
54
+ "num_key_value_heads": 4,
55
  "rms_norm_eps": 1e-06,
56
  "rope_scaling": {
57
  "mrope_section": [
 
66
  "sliding_window": 32768,
67
  "use_cache": true,
68
  "use_sliding_window": false,
69
+ "vocab_size": 152064
70
  },
71
  "torch_dtype": "float32",
72
  "transformers_version": "4.52.4"
model-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:324d47a69b306b736f9c1ed9c3ac6b2f08dd25f3238e0995ca03d1f628d14d3f
3
+ size 4962055488
model-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc9143a966742e2d38c7039d905d907211ec9b1d504786dc6cfa1655db281405
3
+ size 4932744424
model-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61fbad4004a9b2bf23212617ddbdc92b37776a208feb85233cadac3b48cda9cf
3
+ size 4932744424
model-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:363674f4ba1fbd50dfe599ddde8c687cbb4ae95f49b56bb41ca842a60dc8fc06
3
+ size 4998852848
model-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf4af607c8914b24b50f4adbc0e919bfb5ad8cf909d0ba02aa1145e2ee3c706e
3
+ size 4984124832
model-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:439ab29e80c237c362d9aa58f03e1fa6311ca0513613f26c077b15623bdeca22
3
+ size 4932744480
model-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d80f1068c04d6a50bc627b0e97a30ea3b97ce0609284a174a4f9795e77bf1818
3
+ size 3383846936
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
modeling_midashenglm.py CHANGED
@@ -1,18 +1,7 @@
1
  import collections
2
  import collections.abc
3
  from dataclasses import dataclass
4
- from typing import (
5
- Any,
6
- Callable,
7
- Iterable,
8
- List,
9
- Optional,
10
- Sequence,
11
- Tuple,
12
- Union,
13
- Unpack,
14
- cast,
15
- )
16
 
17
  import torch
18
  import torch.nn as nn
@@ -28,6 +17,7 @@ from transformers.models.qwen2_5_omni.modeling_qwen2_5_omni import (
28
  Qwen2_5OmniThinkerTextModel,
29
  )
30
  from transformers.utils import LossKwargs, can_return_tuple
 
31
 
32
  from .configuration_midashenglm import DashengConfig, MiDashengLMConfig
33
 
 
1
  import collections
2
  import collections.abc
3
  from dataclasses import dataclass
4
+ from typing import Any, Callable, Iterable, List, Optional, Sequence, Tuple, Union, cast
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  import torch
7
  import torch.nn as nn
 
17
  Qwen2_5OmniThinkerTextModel,
18
  )
19
  from transformers.utils import LossKwargs, can_return_tuple
20
+ from typing_extensions import Unpack
21
 
22
  from .configuration_midashenglm import DashengConfig, MiDashengLMConfig
23
 
processing_midashenglm.py CHANGED
@@ -4,7 +4,8 @@ import numpy as np
4
  import torch
5
  from transformers import Qwen2Tokenizer, Qwen2TokenizerFast, Wav2Vec2FeatureExtractor
6
  from transformers.feature_extraction_utils import BatchFeature
7
- from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
 
8
 
9
 
10
  class MiDashengLMProcessorKwargs(ProcessingKwargs):
 
4
  import torch
5
  from transformers import Qwen2Tokenizer, Qwen2TokenizerFast, Wav2Vec2FeatureExtractor
6
  from transformers.feature_extraction_utils import BatchFeature
7
+ from transformers.processing_utils import ProcessingKwargs, ProcessorMixin
8
+ from typing_extensions import Unpack
9
 
10
 
11
  class MiDashengLMProcessorKwargs(ProcessingKwargs):