Update README.md
Browse files
README.md
CHANGED
@@ -100,7 +100,7 @@ with torch.no_grad():
|
|
100 |
# Representations: 13 layers (CNN feature extractor + 12 Transformer)
|
101 |
# NOTE: each layer performs differently in different downstream tasks - you should choose empirically
|
102 |
all_layer_hidden_states = torch.stack(outputs.hidden_states).squeeze()
|
103 |
-
print(all_layer_hidden_states.shape) # [13
|
104 |
|
105 |
# For utterance-level classification tasks, you can simply reduce the representation in time
|
106 |
time_reduced_hidden_states = all_layer_hidden_states.mean(-2)
|
|
|
100 |
# Representations: 13 layers (CNN feature extractor + 12 Transformer)
|
101 |
# NOTE: each layer performs differently in different downstream tasks - you should choose empirically
|
102 |
all_layer_hidden_states = torch.stack(outputs.hidden_states).squeeze()
|
103 |
+
print(all_layer_hidden_states.shape) # [13 layers, Time steps, 768 feature_dim]
|
104 |
|
105 |
# For utterance-level classification tasks, you can simply reduce the representation in time
|
106 |
time_reduced_hidden_states = all_layer_hidden_states.mean(-2)
|