{ "checkpoint": "ckpt_11.pt", "model": "GPT2-S", "training_step": 54000, "instability_type": "Divergent loss spikes", "learning_rate": "3e-4", "decay": "0.0", "warm": "2000", "data_type": "BF16" }