{ | |
"base_model_name": "agentica-org_DeepScaleR-1.5B-Preview", | |
"base_model_class": "Qwen2ForCausalLM", | |
"base_loaded_in_4bit": true, | |
"base_loaded_in_8bit": false, | |
"projections": "q, v", | |
"loss": 0.821, | |
"grad_norm": 0.25577130913734436, | |
"learning_rate": 3.896103896103896e-06, | |
"epoch": 0.9908256880733946, | |
"current_steps": 2591, | |
"train_runtime": 17564.2345, | |
"train_samples_per_second": 0.596, | |
"train_steps_per_second": 0.005, | |
"total_flos": 2.4653437656367104e+16, | |
"train_loss": 1.2532436619570226 | |
} |