""" Paraformer model configuration """ from transformers.configuration_utils import PretrainedConfig from transformers.utils import logging logger = logging.get_logger(__name__) PARAFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {} class ParaformerConfig(PretrainedConfig): """ This is the configuration class to store the configuration of a [`ParaformerModel`]. It is used to instantiate a Paraformer model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the Paraformer [nguyenthanhasia/paraformer](https://github.com/nguyenthanhasia/paraformer) architecture. Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information. Args: base_model_name (`str`, *optional*, defaults to `"paraphrase-mpnet-base-v2"`): The name of the base SentenceTransformer model to use for encoding. hidden_size (`int`, *optional*, defaults to 768): Dimensionality of the encoder layers and the pooler layer. num_labels (`int`, *optional*, defaults to 2): Number of labels for classification (binary classification by default). attention_type (`str`, *optional*, defaults to `"general"`): Type of attention mechanism to use. Can be "dot" or "general". use_sparsemax (`bool`, *optional*, defaults to `True`): Whether to use sparsemax instead of softmax for attention weights. dropout_prob (`float`, *optional*, defaults to 0.1): The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. Example: ```python >>> from configuration_paraformer import ParaformerConfig >>> from modeling_paraformer import ParaformerModel >>> # Initializing a Paraformer configuration >>> configuration = ParaformerConfig() >>> # Initializing a model from the configuration >>> model = ParaformerModel(configuration) >>> # Accessing the model configuration >>> configuration = model.config ``` """ model_type = "paraformer" def __init__( self, base_model_name="paraphrase-mpnet-base-v2", hidden_size=768, num_labels=2, attention_type="general", use_sparsemax=True, dropout_prob=0.1, initializer_range=0.02, **kwargs ): super().__init__(**kwargs) self.base_model_name = base_model_name self.hidden_size = hidden_size self.num_labels = num_labels self.attention_type = attention_type self.use_sparsemax = use_sparsemax self.dropout_prob = dropout_prob self.initializer_range = initializer_range