from transformers import PretrainedConfig class SpecT1Config(PretrainedConfig): model_type = "spect1" def __init__( self, hidden_size=4096, intermediate_size=11008, num_attention_heads=32, num_hidden_layers=36, max_position_embeddings=32768, vocab_size=151680, attention_dropout=0.0, hidden_act="silu", max_window_layers=36, rope_theta=640000, sliding_window=32768, attention_bias=True, num_nextn_predict_layers=1, initializer_range=0.02, bos_token_id=151643, eos_token_id=151645, tie_word_embeddings=False, **kwargs ): super().__init__(**kwargs) self.hidden_size = hidden_size self.intermediate_size = intermediate_size self.num_attention_heads = num_attention_heads self.num_hidden_layers = num_hidden_layers self.max_position_embeddings = max_position_embeddings self.vocab_size = vocab_size self.attention_dropout = attention_dropout self.hidden_act = hidden_act self.max_window_layers = max_window_layers self.rope_theta = rope_theta self.sliding_window = sliding_window self.attention_bias = attention_bias self.num_nextn_predict_layers = num_nextn_predict_layers self.initializer_range = initializer_range self.bos_token_id = bos_token_id self.eos_token_id = eos_token_id self.tie_word_embeddings = tie_word_embeddings