nguyenthanhasia
/

paraformer

+"""
+Paraformer model configuration
+"""
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+PARAFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
+class ParaformerConfig(PretrainedConfig):
+    """
+    This is the configuration class to store the configuration of a [`ParaformerModel`]. It is used to instantiate a
+    Paraformer model according to the specified arguments, defining the model architecture. Instantiating a
+    configuration with the defaults will yield a similar configuration to that of the Paraformer
+    [nguyenthanhasia/paraformer](https://github.com/nguyenthanhasia/paraformer) architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        base_model_name (`str`, *optional*, defaults to `"paraphrase-mpnet-base-v2"`):
+            The name of the base SentenceTransformer model to use for encoding.
+        hidden_size (`int`, *optional*, defaults to 768):
+            Dimensionality of the encoder layers and the pooler layer.
+        num_labels (`int`, *optional*, defaults to 2):
+            Number of labels for classification (binary classification by default).
+        attention_type (`str`, *optional*, defaults to `"general"`):
+            Type of attention mechanism to use. Can be "dot" or "general".
+        use_sparsemax (`bool`, *optional*, defaults to `True`):
+            Whether to use sparsemax instead of softmax for attention weights.
+        dropout_prob (`float`, *optional*, defaults to 0.1):
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+    Example:
+    ```python
+    >>> from configuration_paraformer import ParaformerConfig
+    >>> from modeling_paraformer import ParaformerModel
+    >>> # Initializing a Paraformer configuration
+    >>> configuration = ParaformerConfig()
+    >>> # Initializing a model from the configuration
+    >>> model = ParaformerModel(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```
+    """
+    model_type = "paraformer"
+    def __init__(
+        self,
+        base_model_name="paraphrase-mpnet-base-v2",
+        hidden_size=768,
+        num_labels=2,
+        attention_type="general",
+        use_sparsemax=True,
+        dropout_prob=0.1,
+        initializer_range=0.02,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.base_model_name = base_model_name
+        self.hidden_size = hidden_size
+        self.num_labels = num_labels
+        self.attention_type = attention_type
+        self.use_sparsemax = use_sparsemax
+        self.dropout_prob = dropout_prob
+        self.initializer_range = initializer_range