automatedstockminingorg commited on
Commit
cdb39c8
·
verified ·
1 Parent(s): 0546162

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +5 -35
config.json CHANGED
@@ -1,16 +1,10 @@
1
  {
2
  "_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
3
- "architectures": [
4
- "LlamaForCausalLM"
5
- ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
- "eos_token_id": [
10
- 128001,
11
- 128008,
12
- 128009
13
- ],
14
  "hidden_act": "silu",
15
  "hidden_size": 4096,
16
  "initializer_range": 0.02,
@@ -28,20 +22,11 @@
28
  "bnb_4bit_compute_dtype": "bfloat16",
29
  "bnb_4bit_quant_storage": "uint8",
30
  "bnb_4bit_quant_type": "nf4",
31
- "bnb_4bit_use_double_quant": true,
32
- "llm_int8_enable_fp32_cpu_offload": false,
33
- "llm_int8_has_fp16_weight": false,
34
- "llm_int8_skip_modules": null,
35
- "llm_int8_threshold": 6.0,
36
- "load_in_4bit": true,
37
- "load_in_8bit": false,
38
- "quant_method": "bitsandbytes"
39
  },
40
  "rms_norm_eps": 1e-05,
41
  "rope_scaling": {
42
  "factor": 8.0,
43
- "high_freq_factor": 4.0,
44
- "low_freq_factor": 1.0,
45
  "original_max_position_embeddings": 8192,
46
  "rope_type": "llama3"
47
  },
@@ -53,31 +38,16 @@
53
  "vocab_size": 128256,
54
  "serverless": {
55
  "enabled": true,
56
- "max_batch_size": 128,
57
  "max_sequence_length": 2048,
58
  "model_parallelism": {
59
  "enabled": true,
60
  "num_gpus": 1,
61
- "num_shards": 1
62
  },
63
  "quantization": {
64
  "enabled": true,
65
  "target_dtype": "int8"
66
-
67
  }
68
  }
69
  }
70
- "serverless": {
71
- "enabled": true,
72
- "max_batch_size": 128,
73
- "max_sequence_length": 2048,
74
- "model_parallelism": {
75
- "enabled": true,
76
- "num_gpus": 1,
77
- "num_shards": 1
78
- },
79
- "quantization": {
80
- "enabled": true,
81
- "target_dtype": "int8"
82
- }
83
- }
 
1
  {
2
  "_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
3
+ "architectures": ["LlamaForCausalLM"],
 
 
4
  "attention_bias": false,
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 128000,
7
+ "eos_token_id": [128001, 128008, 128009],
 
 
 
 
8
  "hidden_act": "silu",
9
  "hidden_size": 4096,
10
  "initializer_range": 0.02,
 
22
  "bnb_4bit_compute_dtype": "bfloat16",
23
  "bnb_4bit_quant_storage": "uint8",
24
  "bnb_4bit_quant_type": "nf4",
25
+ "bnb_4bit_use_double_quant": true
 
 
 
 
 
 
 
26
  },
27
  "rms_norm_eps": 1e-05,
28
  "rope_scaling": {
29
  "factor": 8.0,
 
 
30
  "original_max_position_embeddings": 8192,
31
  "rope_type": "llama3"
32
  },
 
38
  "vocab_size": 128256,
39
  "serverless": {
40
  "enabled": true,
41
+ "max_batch_size": 64, // Lowered batch size for better compatibility
42
  "max_sequence_length": 2048,
43
  "model_parallelism": {
44
  "enabled": true,
45
  "num_gpus": 1,
46
+ "num_shards": 2 // Adjusted for distributed inference
47
  },
48
  "quantization": {
49
  "enabled": true,
50
  "target_dtype": "int8"
 
51
  }
52
  }
53
  }