ayjays132 commited on
Commit
24fc508
·
verified ·
1 Parent(s): 2ce5954

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +155 -149
config.json CHANGED
@@ -1,149 +1,155 @@
1
- {
2
- "_name_or_path": "ayjays132/CustomGPT2Conversational",
3
- "activation_function": "gelu_new",
4
- "advanced_model_options": {
5
- "contextual_embeddings": {
6
- "approaches": [
7
- "contextual_attention_mechanisms",
8
- "semantic_embedding_regularization"
9
- ],
10
- "enable": true
11
- },
12
- "dynamic_adaptation": {
13
- "enable": true,
14
- "techniques": [
15
- "adaptive_layer_dropping",
16
- "dynamic_context_window"
17
- ]
18
- },
19
- "innovative_neuron_growth": {
20
- "enable": true,
21
- "strategies": [
22
- "selective_neuron_pruning",
23
- "progressive_neuron_expansion"
24
- ]
25
- },
26
- "memory_optimization": {
27
- "enable": true,
28
- "methods": [
29
- "gradient_checkpointing",
30
- "memory-efficient_attention"
31
- ]
32
- },
33
- "meta_learning": {
34
- "approaches": [
35
- "meta_learning_rate_adjustment",
36
- "online_adaptation"
37
- ],
38
- "enable": true
39
- },
40
- "secret_advanced_options": {
41
- "adaptive_token_embedding": {
42
- "enable": true,
43
- "strategies": [
44
- "dynamic_embedding_resizing",
45
- "contextual_embedding_scaling"
46
- ]
47
- },
48
- "future_context_prediction": {
49
- "enable": true,
50
- "techniques": [
51
- "lookahead_context_integration",
52
- "predictive_attention_mechanisms"
53
- ]
54
- },
55
- "multi_modal_integration": {
56
- "enable": true,
57
- "methods": [
58
- "text_image_alignment",
59
- "cross_modal_attention"
60
- ]
61
- }
62
- }
63
- },
64
- "architectures": [
65
- "GPT2LMHeadModel"
66
- ],
67
- "attn_pdrop": 0.1,
68
- "bos_token_id": 50256,
69
- "context_window": 20,
70
- "contextual_embedding_dim": 1024,
71
- "device": "cuda",
72
- "dropout_rate": 0.1,
73
- "embd_pdrop": 0.1,
74
- "embedding_dim": 1024,
75
- "eos_token_id": 50256,
76
- "hidden_dim": 1024,
77
- "initializer_range": 0.02,
78
- "innovative_growth_capacity": 50000,
79
- "integration_settings": {
80
- "config_name": "config.json",
81
- "load_from_transformers": true,
82
- "pytorch_dump_folder_path": "./model_save",
83
- "pytorch_model_bin_name": "pytorch_model.bin"
84
- },
85
- "layer_norm_epsilon": 1e-05,
86
- "max_memory_size": 100000,
87
- "max_neurons": 100,
88
- "meta_learning_rate": 0.001,
89
- "model_type": "gpt2",
90
- "n_ctx": 1024,
91
- "n_embd": 1024,
92
- "n_head": 16,
93
- "n_inner": null,
94
- "n_layer": 24,
95
- "n_positions": 1024,
96
- "num_embeddings": 50268,
97
- "num_heads": 64,
98
- "num_layers": 24,
99
- "output_attentions": true,
100
- "output_hidden_states": true,
101
- "pad_token_id": 50256,
102
- "reorder_and_upcast_attn": false,
103
- "resid_pdrop": 0.1,
104
- "scale_attn_by_inverse_layer_idx": false,
105
- "scale_attn_weights": true,
106
- "sep_token_id": -1,
107
- "special_tokens": {
108
- "additional_special_tokens": [
109
- "<greeting>",
110
- "<farewell>",
111
- "<thank>",
112
- "<apology>"
113
- ],
114
- "bos_token": "<bos>",
115
- "cls_token": "<cls>",
116
- "eos_token": "<eos>",
117
- "mask_token": "<mask>",
118
- "pad_token": "<pad>",
119
- "sep_token": "<sep>",
120
- "unk_token": "<unk>"
121
- },
122
- "state_shape": null,
123
- "summary_activation": null,
124
- "summary_first_dropout": 0.1,
125
- "summary_proj_to_labels": true,
126
- "summary_type": "cls_index",
127
- "summary_use_proj": true,
128
- "target_q_model": null,
129
- "task_specific_params": {
130
- "text-generation": {
131
- "do_sample": true,
132
- "early_stopping": true,
133
- "length_penalty": 1.0,
134
- "max_length": 2048,
135
- "min_length": 64,
136
- "no_repeat_ngram_size": 2,
137
- "num_beams": 8,
138
- "num_return_sequences": 3,
139
- "repetition_penalty": 1.2,
140
- "temperature": 0.9,
141
- "top_k": 50,
142
- "top_p": 0.95
143
- }
144
- },
145
- "torch_dtype": "float32",
146
- "transformers_version": "4.28.0.dev0",
147
- "use_cache": true,
148
- "vocab_size": 50257
149
- }
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ayjays132/CustomGPT2Conversational",
3
+ "activation_function": "gelu_new",
4
+ "advanced_model_options": {
5
+ "contextual_embeddings": {
6
+ "approaches": [
7
+ "contextual_attention_mechanisms",
8
+ "semantic_embedding_regularization"
9
+ ],
10
+ "enable": true
11
+ },
12
+ "dynamic_adaptation": {
13
+ "enable": true,
14
+ "techniques": [
15
+ "adaptive_layer_dropping",
16
+ "dynamic_context_window"
17
+ ]
18
+ },
19
+ "innovative_neuron_growth": {
20
+ "enable": true,
21
+ "strategies": [
22
+ "selective_neuron_pruning",
23
+ "progressive_neuron_expansion"
24
+ ]
25
+ },
26
+ "memory_optimization": {
27
+ "enable": true,
28
+ "methods": [
29
+ "gradient_checkpointing",
30
+ "memory-efficient_attention"
31
+ ]
32
+ },
33
+ "meta_learning": {
34
+ "approaches": [
35
+ "meta_learning_rate_adjustment",
36
+ "online_adaptation"
37
+ ],
38
+ "enable": true
39
+ },
40
+ "secret_advanced_options": {
41
+ "adaptive_token_embedding": {
42
+ "enable": true,
43
+ "strategies": [
44
+ "dynamic_embedding_resizing",
45
+ "contextual_embedding_scaling"
46
+ ]
47
+ },
48
+ "future_context_prediction": {
49
+ "enable": true,
50
+ "techniques": [
51
+ "lookahead_context_integration",
52
+ "predictive_attention_mechanisms"
53
+ ]
54
+ },
55
+ "multi_modal_integration": {
56
+ "enable": true,
57
+ "methods": [
58
+ "text_image_alignment",
59
+ "cross_modal_attention"
60
+ ]
61
+ }
62
+ }
63
+ },
64
+ "architectures": [
65
+ "GPT2LMHeadModel"
66
+ ],
67
+ "max_length": 512,
68
+ "min_length": 50,
69
+ "num_beams": 5,
70
+ "length_penalty": 1.0,
71
+ "no_repeat_ngram_size": 2,
72
+ "early_stopping": true,
73
+ "attn_pdrop": 0.1,
74
+ "bos_token_id": 50256,
75
+ "context_window": 20,
76
+ "contextual_embedding_dim": 1024,
77
+ "device": "cuda",
78
+ "dropout_rate": 0.1,
79
+ "embd_pdrop": 0.1,
80
+ "embedding_dim": 1024,
81
+ "eos_token_id": 50256,
82
+ "hidden_dim": 1024,
83
+ "initializer_range": 0.02,
84
+ "innovative_growth_capacity": 50000,
85
+ "integration_settings": {
86
+ "config_name": "config.json",
87
+ "load_from_transformers": true,
88
+ "pytorch_dump_folder_path": "./model_save",
89
+ "pytorch_model_bin_name": "pytorch_model.bin"
90
+ },
91
+ "layer_norm_epsilon": 1e-05,
92
+ "max_memory_size": 100000,
93
+ "max_neurons": 100,
94
+ "meta_learning_rate": 0.001,
95
+ "model_type": "gpt2",
96
+ "n_ctx": 1024,
97
+ "n_embd": 1024,
98
+ "n_head": 16,
99
+ "n_inner": null,
100
+ "n_layer": 24,
101
+ "n_positions": 1024,
102
+ "num_embeddings": 50268,
103
+ "num_heads": 64,
104
+ "num_layers": 24,
105
+ "output_attentions": true,
106
+ "output_hidden_states": true,
107
+ "pad_token_id": 50256,
108
+ "reorder_and_upcast_attn": false,
109
+ "resid_pdrop": 0.1,
110
+ "scale_attn_by_inverse_layer_idx": false,
111
+ "scale_attn_weights": true,
112
+ "sep_token_id": -1,
113
+ "special_tokens": {
114
+ "additional_special_tokens": [
115
+ "<greeting>",
116
+ "<farewell>",
117
+ "<thank>",
118
+ "<apology>"
119
+ ],
120
+ "bos_token": "<bos>",
121
+ "cls_token": "<cls>",
122
+ "eos_token": "<eos>",
123
+ "mask_token": "<mask>",
124
+ "pad_token": "<pad>",
125
+ "sep_token": "<sep>",
126
+ "unk_token": "<unk>"
127
+ },
128
+ "state_shape": null,
129
+ "summary_activation": null,
130
+ "summary_first_dropout": 0.1,
131
+ "summary_proj_to_labels": true,
132
+ "summary_type": "cls_index",
133
+ "summary_use_proj": true,
134
+ "target_q_model": null,
135
+ "task_specific_params": {
136
+ "text-generation": {
137
+ "do_sample": true,
138
+ "early_stopping": true,
139
+ "length_penalty": 1.0,
140
+ "max_length": 2048,
141
+ "min_length": 64,
142
+ "no_repeat_ngram_size": 2,
143
+ "num_beams": 8,
144
+ "num_return_sequences": 3,
145
+ "repetition_penalty": 1.2,
146
+ "temperature": 0.9,
147
+ "top_k": 50,
148
+ "top_p": 0.95
149
+ }
150
+ },
151
+ "torch_dtype": "float32",
152
+ "transformers_version": "4.28.0.dev0",
153
+ "use_cache": true,
154
+ "vocab_size": 50257
155
+ }