nguyenthanhasia commited on
Commit
7b5ea8f
·
verified ·
1 Parent(s): cc3319c

Upload configuration_paraformer.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_paraformer.py +76 -0
configuration_paraformer.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Paraformer model configuration
3
+ """
4
+
5
+ from transformers.configuration_utils import PretrainedConfig
6
+ from transformers.utils import logging
7
+
8
+ logger = logging.get_logger(__name__)
9
+
10
+ PARAFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
11
+
12
+
13
+ class ParaformerConfig(PretrainedConfig):
14
+ """
15
+ This is the configuration class to store the configuration of a [`ParaformerModel`]. It is used to instantiate a
16
+ Paraformer model according to the specified arguments, defining the model architecture. Instantiating a
17
+ configuration with the defaults will yield a similar configuration to that of the Paraformer
18
+ [nguyenthanhasia/paraformer](https://github.com/nguyenthanhasia/paraformer) architecture.
19
+
20
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
21
+ documentation from [`PretrainedConfig`] for more information.
22
+
23
+ Args:
24
+ base_model_name (`str`, *optional*, defaults to `"paraphrase-mpnet-base-v2"`):
25
+ The name of the base SentenceTransformer model to use for encoding.
26
+ hidden_size (`int`, *optional*, defaults to 768):
27
+ Dimensionality of the encoder layers and the pooler layer.
28
+ num_labels (`int`, *optional*, defaults to 2):
29
+ Number of labels for classification (binary classification by default).
30
+ attention_type (`str`, *optional*, defaults to `"general"`):
31
+ Type of attention mechanism to use. Can be "dot" or "general".
32
+ use_sparsemax (`bool`, *optional*, defaults to `True`):
33
+ Whether to use sparsemax instead of softmax for attention weights.
34
+ dropout_prob (`float`, *optional*, defaults to 0.1):
35
+ The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
36
+
37
+ Example:
38
+
39
+ ```python
40
+ >>> from configuration_paraformer import ParaformerConfig
41
+ >>> from modeling_paraformer import ParaformerModel
42
+
43
+ >>> # Initializing a Paraformer configuration
44
+ >>> configuration = ParaformerConfig()
45
+
46
+ >>> # Initializing a model from the configuration
47
+ >>> model = ParaformerModel(configuration)
48
+
49
+ >>> # Accessing the model configuration
50
+ >>> configuration = model.config
51
+ ```
52
+ """
53
+
54
+ model_type = "paraformer"
55
+
56
+ def __init__(
57
+ self,
58
+ base_model_name="paraphrase-mpnet-base-v2",
59
+ hidden_size=768,
60
+ num_labels=2,
61
+ attention_type="general",
62
+ use_sparsemax=True,
63
+ dropout_prob=0.1,
64
+ initializer_range=0.02,
65
+ **kwargs
66
+ ):
67
+ super().__init__(**kwargs)
68
+
69
+ self.base_model_name = base_model_name
70
+ self.hidden_size = hidden_size
71
+ self.num_labels = num_labels
72
+ self.attention_type = attention_type
73
+ self.use_sparsemax = use_sparsemax
74
+ self.dropout_prob = dropout_prob
75
+ self.initializer_range = initializer_range
76
+