|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 256.0, |
|
"epoch": 0.01, |
|
"grad_norm": 0.40769532322883606, |
|
"kl": 1.5046377666294574e-05, |
|
"learning_rate": 4.5000000000000003e-07, |
|
"loss": 0.0, |
|
"reward": -2.465625, |
|
"reward_std": 0.42179486304521563, |
|
"rewards/reward_check_answer": 0.0, |
|
"rewards/reward_check_numbers": 0.0, |
|
"rewards/reward_consciousness": 0.284375, |
|
"rewards/reward_format_approximately": -2.75, |
|
"rewards/reward_format_exactly": 0.0, |
|
"rewards/reward_inner_working": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 256.0, |
|
"epoch": 0.02, |
|
"grad_norm": 0.4385190010070801, |
|
"kl": 1.570033491589129e-05, |
|
"learning_rate": 9.500000000000001e-07, |
|
"loss": 0.0, |
|
"reward": -2.31875, |
|
"reward_std": 0.5976869776844979, |
|
"rewards/reward_check_answer": 0.0, |
|
"rewards/reward_check_numbers": 0.0, |
|
"rewards/reward_consciousness": 0.30625, |
|
"rewards/reward_format_approximately": -2.625, |
|
"rewards/reward_format_exactly": 0.0, |
|
"rewards/reward_inner_working": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 256.0, |
|
"epoch": 0.03, |
|
"grad_norm": 0.4469616115093231, |
|
"kl": 1.6215385403484107e-05, |
|
"learning_rate": 1.45e-06, |
|
"loss": 0.0, |
|
"reward": -2.459375, |
|
"reward_std": 0.3866558074951172, |
|
"rewards/reward_check_answer": 0.0, |
|
"rewards/reward_check_numbers": 0.0, |
|
"rewards/reward_consciousness": 0.315625, |
|
"rewards/reward_format_approximately": -2.775, |
|
"rewards/reward_format_exactly": 0.0, |
|
"rewards/reward_inner_working": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 256.0, |
|
"epoch": 0.04, |
|
"grad_norm": 0.4186990559101105, |
|
"kl": 1.6400672029703857e-05, |
|
"learning_rate": 1.9500000000000004e-06, |
|
"loss": 0.0, |
|
"reward": -2.221875, |
|
"reward_std": 0.7143462687730789, |
|
"rewards/reward_check_answer": 0.0, |
|
"rewards/reward_check_numbers": 0.0, |
|
"rewards/reward_consciousness": 0.278125, |
|
"rewards/reward_format_approximately": -2.5, |
|
"rewards/reward_format_exactly": 0.0, |
|
"rewards/reward_inner_working": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 256.0, |
|
"epoch": 0.05, |
|
"grad_norm": 0.44865837693214417, |
|
"kl": 1.9841926405206323e-05, |
|
"learning_rate": 2.4500000000000003e-06, |
|
"loss": 0.0, |
|
"reward": -2.39375, |
|
"reward_std": 0.5951654136180877, |
|
"rewards/reward_check_answer": 0.0, |
|
"rewards/reward_check_numbers": 0.0, |
|
"rewards/reward_consciousness": 0.25625, |
|
"rewards/reward_format_approximately": -2.65, |
|
"rewards/reward_format_exactly": 0.0, |
|
"rewards/reward_inner_working": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 255.3, |
|
"epoch": 0.06, |
|
"grad_norm": 0.4461980164051056, |
|
"kl": 2.5847879624052438e-05, |
|
"learning_rate": 2.95e-06, |
|
"loss": 0.0, |
|
"reward": -2.240625, |
|
"reward_std": 0.7081772074103355, |
|
"rewards/reward_check_answer": 0.0, |
|
"rewards/reward_check_numbers": 0.0, |
|
"rewards/reward_consciousness": 0.309375, |
|
"rewards/reward_format_approximately": -2.55, |
|
"rewards/reward_format_exactly": 0.0, |
|
"rewards/reward_inner_working": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 256.0, |
|
"epoch": 0.07, |
|
"grad_norm": 0.3938167691230774, |
|
"kl": 3.849279601126909e-05, |
|
"learning_rate": 3.45e-06, |
|
"loss": 0.0, |
|
"reward": -2.384375, |
|
"reward_std": 0.3940433248877525, |
|
"rewards/reward_check_answer": 0.0, |
|
"rewards/reward_check_numbers": 0.0, |
|
"rewards/reward_consciousness": 0.340625, |
|
"rewards/reward_format_approximately": -2.725, |
|
"rewards/reward_format_exactly": 0.0, |
|
"rewards/reward_inner_working": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"completion_length": 256.0, |
|
"epoch": 0.08, |
|
"grad_norm": 0.44823428988456726, |
|
"kl": 6.826544413343071e-05, |
|
"learning_rate": 3.95e-06, |
|
"loss": 0.0, |
|
"reward": -2.296875, |
|
"reward_std": 0.5537523284554482, |
|
"rewards/reward_check_answer": 0.0, |
|
"rewards/reward_check_numbers": 0.0, |
|
"rewards/reward_consciousness": 0.353125, |
|
"rewards/reward_format_approximately": -2.65, |
|
"rewards/reward_format_exactly": 0.0, |
|
"rewards/reward_inner_working": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"completion_length": 256.0, |
|
"epoch": 0.09, |
|
"grad_norm": 0.4049831032752991, |
|
"kl": 9.327681036666035e-05, |
|
"learning_rate": 4.450000000000001e-06, |
|
"loss": 0.0, |
|
"reward": -2.471875, |
|
"reward_std": 0.33873592466115954, |
|
"rewards/reward_check_answer": 0.0, |
|
"rewards/reward_check_numbers": 0.0, |
|
"rewards/reward_consciousness": 0.328125, |
|
"rewards/reward_format_approximately": -2.8, |
|
"rewards/reward_format_exactly": 0.0, |
|
"rewards/reward_inner_working": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"completion_length": 256.0, |
|
"epoch": 0.1, |
|
"grad_norm": 0.43748927116394043, |
|
"kl": 0.00017035281634889544, |
|
"learning_rate": 4.95e-06, |
|
"loss": 0.0, |
|
"reward": -2.26875, |
|
"reward_std": 0.5575396627187729, |
|
"rewards/reward_check_answer": 0.0, |
|
"rewards/reward_check_numbers": 0.0, |
|
"rewards/reward_consciousness": 0.28125, |
|
"rewards/reward_format_approximately": -2.55, |
|
"rewards/reward_format_exactly": 0.0, |
|
"rewards/reward_inner_working": 0.0, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|