| import os, sys; sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) | |
| import torch | |
| from bit_transformer.model import BitTransformerLM | |
| def test_act_halting(): | |
| model = BitTransformerLM( | |
| d_model=16, | |
| nhead=2, | |
| num_layers=3, | |
| dim_feedforward=32, | |
| max_seq_len=8, | |
| use_act=True, | |
| act_threshold=0.1, | |
| ) | |
| bits = torch.randint(0, 2, (1, 8), dtype=torch.long) | |
| _, telemetry = model(bits) | |
| halt_probs = torch.stack(telemetry["halt_probs"])[:, 0, 0] | |
| assert (halt_probs < 1).sum().item() < model.num_layers | |