File size: 4,521 Bytes
1fc431e
 
 
 
f73739d
1fc431e
7c4b4c7
1fc431e
 
 
 
 
066da06
f73739d
066da06
1fc431e
 
066da06
 
 
ed5b77b
 
1fc431e
066da06
 
1fc431e
95b58dd
066da06
1fc431e
 
 
066da06
f73739d
066da06
 
7c4b4c7
b6c521b
066da06
 
ed5b77b
07c5b60
1fc431e
066da06
 
1fc431e
95b58dd
c5aa377
1fc431e
 
 
066da06
3cdf5e8
066da06
 
7c4b4c7
7216e43
066da06
 
ed5b77b
7c4b4c7
 
066da06
 
7c4b4c7
 
066da06
7c4b4c7
 
 
066da06
f73739d
066da06
 
7c4b4c7
066da06
 
 
ed5b77b
 
1fc431e
066da06
 
1fc431e
95b58dd
066da06
7c4b4c7
 
 
066da06
f73739d
066da06
 
7c4b4c7
066da06
 
 
ed5b77b
 
7c4b4c7
066da06
 
7c4b4c7
 
c5aa377
7c4b4c7
 
 
f73739d
7c4b4c7
1fc431e
066da06
 
 
 
1fc431e
 
 
7c4b4c7
1fc431e
f73739d
8327b1f
1fc431e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.4,
  "eval_steps": 500,
  "global_step": 10,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "completion_length": 83.0,
      "epoch": 0.8,
      "grad_norm": 7.9807281494140625,
      "kl": 0.0,
      "learning_rate": 5e-07,
      "loss": -0.0,
      "reward": 1.7760924957692623,
      "reward_std": 0.022786720073781908,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 1.5,
      "rewards/question_recreation_reward_func": 0.1510925330221653,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.125,
      "step": 2
    },
    {
      "completion_length": 56.833333333333336,
      "epoch": 1.4,
      "grad_norm": 10.24837589263916,
      "kl": 0.0015244192351625923,
      "learning_rate": 4.415111107797445e-07,
      "loss": 0.0,
      "reward": 2.550933281580607,
      "reward_std": 0.07004699483513832,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 2.0,
      "rewards/question_recreation_reward_func": 0.42593332131703693,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.125,
      "step": 4
    },
    {
      "completion_length": 77.66666666666667,
      "epoch": 2.0,
      "grad_norm": 6.0542120933532715,
      "kl": 0.0016350069102675964,
      "learning_rate": 2.934120444167326e-07,
      "loss": 0.0,
      "reward": 1.5878510723511379,
      "reward_std": 0.0029388506275912127,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 1.3333333333333333,
      "rewards/question_recreation_reward_func": 0.1295176992813746,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.125,
      "step": 6
    },
    {
      "completion_length": 72.25,
      "epoch": 2.8,
      "grad_norm": 2.34584641456604,
      "kl": 0.0029444374376907945,
      "learning_rate": 1.2500000000000005e-07,
      "loss": -0.0,
      "reward": 1.8496044538915157,
      "reward_std": 0.0011594545212574303,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 1.5,
      "rewards/question_recreation_reward_func": 0.22460449486970901,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.125,
      "step": 8
    },
    {
      "completion_length": 75.83333333333333,
      "epoch": 3.4,
      "grad_norm": 5.32132625579834,
      "kl": 0.007413267778853576,
      "learning_rate": 1.507684480352292e-08,
      "loss": 0.0,
      "reward": 1.6325677633285522,
      "reward_std": 0.042186157782756105,
      "rewards/concensus_correctness_reward_func": 0.0,
      "rewards/consensus_reward_func": 0.0,
      "rewards/cumulative_reward_2": 0.0,
      "rewards/final_correctness_reward_func": 1.3333333333333333,
      "rewards/question_recreation_reward_func": 0.17423443992932638,
      "rewards/soft_format_reward_func": 0.0,
      "rewards/strict_format_reward_func": 0.0,
      "rewards/xmlcount_reward_func": 0.125,
      "step": 10
    },
    {
      "epoch": 3.4,
      "step": 10,
      "total_flos": 0.0,
      "train_loss": 6.0011893765477e-06,
      "train_runtime": 503.4756,
      "train_samples_per_second": 0.079,
      "train_steps_per_second": 0.02
    }
  ],
  "logging_steps": 2,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 10,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}