return view(-1, 1) (#5)
Browse files- return view(-1, 1) (4c3fd5f336e9f80299309a2ef125310b50704348)
- modelling_deepseek.py +1 -1
modelling_deepseek.py
CHANGED
@@ -341,7 +341,7 @@ class DeepseekMoE(nn.Module):
|
|
341 |
y = y.view(*orig_shape)
|
342 |
y = AddAuxiliaryLoss.apply(y, aux_loss)
|
343 |
else:
|
344 |
-
y = self.moe_infer(hidden_states, flat_topk_idx, topk_weight
|
345 |
if self.config.n_shared_experts is not None:
|
346 |
y = y + self.shared_experts(identity)
|
347 |
return y
|
|
|
341 |
y = y.view(*orig_shape)
|
342 |
y = AddAuxiliaryLoss.apply(y, aux_loss)
|
343 |
else:
|
344 |
+
y = self.moe_infer(hidden_states, flat_topk_idx, topk_weight.view(-1, 1)).view(*orig_shape)
|
345 |
if self.config.n_shared_experts is not None:
|
346 |
y = y + self.shared_experts(identity)
|
347 |
return y
|