| import torch.nn as nn | |
| class HeadProjectorResidual(nn.Module): | |
| def __init__( | |
| self, | |
| input_embedding_dim: int = 1000, | |
| output_embedding_dim: int = 512, | |
| dropout: float = 0.4 | |
| ): | |
| super().__init__() | |
| self.projection = nn.Linear(input_embedding_dim, output_embedding_dim) | |
| self.gelu = nn.GELU() | |
| self.fc = nn.Linear(output_embedding_dim, output_embedding_dim) | |
| self.dropout = nn.Dropout(dropout) | |
| self.layer_norm = nn.LayerNorm(output_embedding_dim) | |
| def forward(self, x): | |
| projected = self.projection(x) | |
| x = self.gelu(projected) | |
| x = self.fc(x) | |
| x = self.dropout(x) | |
| x = x + projected | |
| x = self.layer_norm(x) | |
| return x |