{ "checkpoint": "ckpt_142.pt", "model": "GPT2-L", "training_step": 6000, "instability_type": "Recoverable loss spikes", "learning_rate": "1e-2", "decay": "0.1", "warm": "2000", "data_type": "FP8_with_BF16_head", "reproducibility": false }