trainer_state.json · maplekeng/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-sly_nimble

Qwen2.5-0.5B-Instruct-Gensyn-Swarm-sly_nimble_lemur

File size: 4,521 Bytes

1fc431e
 
 
 
f73739d
1fc431e
7c4b4c7
1fc431e
 
 
 
 
066da06
f73739d
066da06
1fc431e
 
066da06
 
 
ed5b77b
 
1fc431e
066da06
 
1fc431e
95b58dd
066da06
1fc431e
 
 
066da06
f73739d
066da06
 
7c4b4c7
b6c521b
066da06
 
ed5b77b
07c5b60
1fc431e
066da06
 
1fc431e
95b58dd
c5aa377
1fc431e
 
 
066da06
3cdf5e8
066da06
 
7c4b4c7
7216e43
066da06
 
ed5b77b
7c4b4c7
 
066da06
 
7c4b4c7
 
066da06
7c4b4c7
 
 
066da06
f73739d
066da06
 
7c4b4c7
066da06
 
 
ed5b77b
 
1fc431e
066da06
 
1fc431e
95b58dd
066da06
7c4b4c7
 
 
066da06
f73739d
066da06
 
7c4b4c7
066da06
 
 
ed5b77b
 
7c4b4c7
066da06
 
7c4b4c7
 
c5aa377
7c4b4c7
 
 
f73739d
7c4b4c7
1fc431e
066da06
 
 
 
1fc431e
 
 
7c4b4c7
1fc431e
f73739d
8327b1f
1fc431e

{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.4,
  "eval_steps": 500,
  "global_step": 10,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "completion_length": 83.0,
      "epoch": 0.8,
      "grad_norm": 7.9807281494140625,
      "kl": 0.0,
      "learning_rate": 5e-07,
      "loss": -0.0,
      "reward": 1.7760924957692623,
      "reward_std": 0.022786720073781908,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 1.5,
      "rewards/question_recreation_reward_func": 0.1510925330221653,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.125,
      "step": 2
    },
    {
      "completion_length": 56.833333333333336,
      "epoch": 1.4,
      "grad_norm": 10.24837589263916,
      "kl": 0.0015244192351625923,
      "learning_rate": 4.415111107797445e-07,
      "loss": 0.0,
      "reward": 2.550933281580607,
      "reward_std": 0.07004699483513832,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 2.0,
      "rewards/question_recreation_reward_func": 0.42593332131703693,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.125,
      "step": 4
    },
    {
      "completion_length": 77.66666666666667,
      "epoch": 2.0,
      "grad_norm": 6.0542120933532715,
      "kl": 0.0016350069102675964,
      "learning_rate": 2.934120444167326e-07,
      "loss": 0.0,
      "reward": 1.5878510723511379,
      "reward_std": 0.0029388506275912127,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 1.3333333333333333,
      "rewards/question_recreation_reward_func": 0.1295176992813746,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.125,
      "step": 6
    },
    {
      "completion_length": 72.25,
      "epoch": 2.8,
      "grad_norm": 2.34584641456604,
      "kl": 0.0029444374376907945,
      "learning_rate": 1.2500000000000005e-07,
      "loss": -0.0,
      "reward": 1.8496044538915157,
      "reward_std": 0.0011594545212574303,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 1.5,
      "rewards/question_recreation_reward_func": 0.22460449486970901,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.125,
      "step": 8
    },
    {
      "completion_length": 75.83333333333333,
      "epoch": 3.4,
      "grad_norm": 5.32132625579834,
      "kl": 0.007413267778853576,
      "learning_rate": 1.507684480352292e-08,
      "loss": 0.0,
      "reward": 1.6325677633285522,
      "reward_std": 0.042186157782756105,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 1.3333333333333333,
      "rewards/question_recreation_reward_func": 0.17423443992932638,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.125,
      "step": 10
    },
    {
      "epoch": 3.4,
      "step": 10,
      "total_flos": 0.0,
      "train_loss": 6.0011893765477e-06,
      "train_runtime": 503.4756,
      "train_samples_per_second": 0.079,
      "train_steps_per_second": 0.02
    }
  ],
  "logging_steps": 2,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 10,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}