drwlf commited on
Commit
d2da85a
·
verified ·
1 Parent(s): 51708d8

Add abliterated Medra4b model

Browse files
config.json CHANGED
@@ -10,7 +10,6 @@
10
  "mm_tokens_per_image": 256,
11
  "model_type": "gemma3",
12
  "text_config": {
13
- "_sliding_window_pattern": 6,
14
  "attention_bias": false,
15
  "attention_dropout": 0.0,
16
  "attn_logit_softcapping": null,
@@ -21,42 +20,6 @@
21
  "hidden_size": 2560,
22
  "initializer_range": 0.02,
23
  "intermediate_size": 10240,
24
- "layer_types": [
25
- "sliding_attention",
26
- "sliding_attention",
27
- "sliding_attention",
28
- "sliding_attention",
29
- "sliding_attention",
30
- "full_attention",
31
- "sliding_attention",
32
- "sliding_attention",
33
- "sliding_attention",
34
- "sliding_attention",
35
- "sliding_attention",
36
- "full_attention",
37
- "sliding_attention",
38
- "sliding_attention",
39
- "sliding_attention",
40
- "sliding_attention",
41
- "sliding_attention",
42
- "full_attention",
43
- "sliding_attention",
44
- "sliding_attention",
45
- "sliding_attention",
46
- "sliding_attention",
47
- "sliding_attention",
48
- "full_attention",
49
- "sliding_attention",
50
- "sliding_attention",
51
- "sliding_attention",
52
- "sliding_attention",
53
- "sliding_attention",
54
- "full_attention",
55
- "sliding_attention",
56
- "sliding_attention",
57
- "sliding_attention",
58
- "sliding_attention"
59
- ],
60
  "max_position_embeddings": 131072,
61
  "model_type": "gemma3_text",
62
  "num_attention_heads": 8,
@@ -71,12 +34,13 @@
71
  },
72
  "rope_theta": 1000000.0,
73
  "sliding_window": 1024,
 
74
  "torch_dtype": "bfloat16",
75
  "use_cache": false,
76
  "vocab_size": 262208
77
  },
78
  "torch_dtype": "bfloat16",
79
- "transformers_version": "4.53.2",
80
  "use_cache": true,
81
  "vision_config": {
82
  "attention_dropout": 0.0,
 
10
  "mm_tokens_per_image": 256,
11
  "model_type": "gemma3",
12
  "text_config": {
 
13
  "attention_bias": false,
14
  "attention_dropout": 0.0,
15
  "attn_logit_softcapping": null,
 
20
  "hidden_size": 2560,
21
  "initializer_range": 0.02,
22
  "intermediate_size": 10240,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "max_position_embeddings": 131072,
24
  "model_type": "gemma3_text",
25
  "num_attention_heads": 8,
 
34
  },
35
  "rope_theta": 1000000.0,
36
  "sliding_window": 1024,
37
+ "sliding_window_pattern": 6,
38
  "torch_dtype": "bfloat16",
39
  "use_cache": false,
40
  "vocab_size": 262208
41
  },
42
  "torch_dtype": "bfloat16",
43
+ "transformers_version": "4.52.4",
44
  "use_cache": true,
45
  "vision_config": {
46
  "attention_dropout": 0.0,
generation_config.json CHANGED
@@ -9,5 +9,5 @@
9
  "pad_token_id": 0,
10
  "top_k": 64,
11
  "top_p": 0.95,
12
- "transformers_version": "4.53.2"
13
  }
 
9
  "pad_token_id": 0,
10
  "top_k": 64,
11
  "top_p": 0.95,
12
+ "transformers_version": "4.52.4"
13
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9c7183114114912e06e38aaa184a337fc555f83431abefc3f837b04cad3c909
3
  size 4961251752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1c1828ec0fde3f837e7e4207b4b1b2620dd14d714a7613b4ac4ef6fc498b8cb
3
  size 4961251752
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f896465b4e27914d3cb2c8f01dbf6618b172f290cc3f907ad61cb990f6f9c5e
3
  size 4981531360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae620073dc4b05f4aa65611ca790c0ed5e9b19655f615e768ad723fbd9f52e31
3
  size 4981531360
model.safetensors.index.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 4971331952,
4
  "total_size": 9942663904
5
  },
6
  "weight_map": {
 
1
  {
2
  "metadata": {
 
3
  "total_size": 9942663904
4
  },
5
  "weight_map": {