import torch from typing import Dict from .model import BitTransformerLM import torch.nn as nn def expand_model(model: BitTransformerLM, new_params: Dict) -> BitTransformerLM: """Return a new model with updated params and copied weights.""" new_model = BitTransformerLM(**new_params) new_state = new_model.state_dict() old_state = model.state_dict() for k, v in old_state.items(): if k in new_state: dest = new_state[k] slices = tuple(slice(0, min(d, s)) for d, s in zip(dest.shape, v.shape)) dest[slices].copy_(v[slices]) if dest.shape != v.shape: mask = torch.ones_like(dest, dtype=torch.bool) mask[slices] = False if "bias" in k: dest[mask] = 0.0 else: dest[mask] = 0.001 * torch.randn_like(dest[mask]) for k, v in new_state.items(): if k not in old_state: if "bias" in k: v.zero_() elif v.dim() > 1: nn.init.normal_(v, mean=0.0, std=1e-3) else: v.zero_() new_model.load_state_dict(new_state) return new_model