|
from transformers.models.gpt2.configuration_gpt2 import GPT2Config |
|
|
|
class BackpackGPT2Config(GPT2Config): |
|
""" |
|
This is the configuration class to store the configuration of a [`GPT2Model`] or a [`TFGPT2Model`]. It is used to |
|
instantiate a Backpack GPT-2 model according to the specified arguments, defining the model architecture. |
|
|
|
Configuration objects inherit from [`GPT2Config`] and can be used to control the model outputs. Read the |
|
documentation from [`GPT2Config`] for more information. |
|
|
|
Args: |
|
num_senses (`int`, *optional*, defaults to 16): |
|
The number of sense vectors to define for each word. |
|
sense_intermediate_scale (`int`, *optional*, defaults ot 4): |
|
The hidden dimensionality of the sense vector network. |
|
|
|
Example: |
|
|
|
```python |
|
>>> from transformers import BackpackGPT2Config, BackpackGPT2Model |
|
|
|
>>> # Initializing a GPT2 configuration |
|
>>> configuration = BackpackGPT2Config() |
|
|
|
>>> # Initializing a model (with random weights) from the configuration |
|
>>> model = BackpackGPT2Model(configuration) |
|
|
|
>>> # Accessing the model configuration |
|
>>> configuration = model.config |
|
""" |
|
|
|
def __init__(self, |
|
vocab_size=50264, |
|
num_senses=16, |
|
sense_intermediate_scale=4, |
|
n_positions=512, |
|
scale_attn_by_inverse_layer_idx=True, |
|
**kwargs, |
|
): |
|
self.num_senses = num_senses |
|
self.sense_intermediate_scale = sense_intermediate_scale |
|
super().__init__(vocab_size=vocab_size, n_positions=n_positions, scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx, **kwargs) |
|
|