WCNegentropy's picture
🤖 Updated BitTransformerLM from development space
36c78b1 verified
raw
history blame
1.2 kB
import torch
from typing import Dict
from .model import BitTransformerLM
import torch.nn as nn
def expand_model(model: BitTransformerLM, new_params: Dict) -> BitTransformerLM:
"""Return a new model with updated params and copied weights."""
new_model = BitTransformerLM(**new_params)
new_state = new_model.state_dict()
old_state = model.state_dict()
for k, v in old_state.items():
if k in new_state:
dest = new_state[k]
slices = tuple(slice(0, min(d, s)) for d, s in zip(dest.shape, v.shape))
dest[slices].copy_(v[slices])
if dest.shape != v.shape:
mask = torch.ones_like(dest, dtype=torch.bool)
mask[slices] = False
if "bias" in k:
dest[mask] = 0.0
else:
dest[mask] = 0.001 * torch.randn_like(dest[mask])
for k, v in new_state.items():
if k not in old_state:
if "bias" in k:
v.zero_()
elif v.dim() > 1:
nn.init.normal_(v, mean=0.0, std=1e-3)
else:
v.zero_()
new_model.load_state_dict(new_state)
return new_model