lllyasviel
i
9360743
import torch
import accelerate.accelerator
from diffusers.models.normalization import RMSNorm, LayerNorm, FP32LayerNorm, AdaLayerNormContinuous
accelerate.accelerator.convert_outputs_to_fp32 = lambda x: x
def LayerNorm_forward(self, x):
return torch.nn.functional.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps).to(x)
LayerNorm.forward = LayerNorm_forward
torch.nn.LayerNorm.forward = LayerNorm_forward
def FP32LayerNorm_forward(self, x):
origin_dtype = x.dtype
return torch.nn.functional.layer_norm(
x.float(),
self.normalized_shape,
self.weight.float() if self.weight is not None else None,
self.bias.float() if self.bias is not None else None,
self.eps,
).to(origin_dtype)
FP32LayerNorm.forward = FP32LayerNorm_forward
def RMSNorm_forward(self, hidden_states):
input_dtype = hidden_states.dtype
variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)
hidden_states = hidden_states * torch.rsqrt(variance + self.eps)
if self.weight is None:
return hidden_states.to(input_dtype)
return hidden_states.to(input_dtype) * self.weight.to(input_dtype)
RMSNorm.forward = RMSNorm_forward
def AdaLayerNormContinuous_forward(self, x, conditioning_embedding):
emb = self.linear(self.silu(conditioning_embedding))
scale, shift = emb.chunk(2, dim=1)
x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :]
return x
AdaLayerNormContinuous.forward = AdaLayerNormContinuous_forward