File size: 2,364 Bytes
0234e87
 
 
 
 
 
 
 
 
 
 
 
 
 
f2fbbd5
0234e87
 
 
 
 
 
f2fbbd5
0234e87
 
 
 
f2fbbd5
0234e87
f2fbbd5
0234e87
 
 
 
 
 
f2fbbd5
0234e87
 
 
 
f2fbbd5
0234e87
 
 
 
 
 
f2fbbd5
0234e87
f2fbbd5
0234e87
 
 
 
f2fbbd5
0234e87
f2fbbd5
0234e87
 
 
 
f2fbbd5
0234e87
 
 
 
 
 
f2fbbd5
0234e87
 
 
 
 
 
f2fbbd5
 
 
 
0234e87
 
 
 
 
 
f2fbbd5
 
 
 
0234e87
 
 
 
 
 
f2fbbd5
0234e87
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 42,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.023809523809523808,
      "grad_norm": 0.35546875,
      "learning_rate": 4e-05,
      "loss": 0.9948,
      "step": 1
    },
    {
      "epoch": 0.11904761904761904,
      "grad_norm": 0.31640625,
      "learning_rate": 0.0002,
      "loss": 1.1099,
      "step": 5
    },
    {
      "epoch": 0.23809523809523808,
      "grad_norm": 0.31640625,
      "learning_rate": 0.0001911228490388136,
      "loss": 1.048,
      "step": 10
    },
    {
      "epoch": 0.35714285714285715,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00016606747233900815,
      "loss": 0.9912,
      "step": 15
    },
    {
      "epoch": 0.47619047619047616,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00012928227712765504,
      "loss": 0.9396,
      "step": 20
    },
    {
      "epoch": 0.5952380952380952,
      "grad_norm": 0.2294921875,
      "learning_rate": 8.729821802531212e-05,
      "loss": 0.997,
      "step": 25
    },
    {
      "epoch": 0.7142857142857143,
      "grad_norm": 0.2421875,
      "learning_rate": 4.756927164427685e-05,
      "loss": 1.0419,
      "step": 30
    },
    {
      "epoch": 0.8333333333333334,
      "grad_norm": 0.248046875,
      "learning_rate": 1.7149035075615794e-05,
      "loss": 1.0231,
      "step": 35
    },
    {
      "epoch": 0.9523809523809523,
      "grad_norm": 0.208984375,
      "learning_rate": 1.4384089652291543e-06,
      "loss": 0.9758,
      "step": 40
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.0181750059127808,
      "eval_runtime": 88.1603,
      "eval_samples_per_second": 3.755,
      "eval_steps_per_second": 0.476,
      "step": 42
    },
    {
      "epoch": 1.0,
      "step": 42,
      "total_flos": 2.935565656994611e+16,
      "train_loss": 1.0143198739914667,
      "train_runtime": 408.5529,
      "train_samples_per_second": 0.818,
      "train_steps_per_second": 0.103
    }
  ],
  "logging_steps": 5,
  "max_steps": 42,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 10,
  "total_flos": 2.935565656994611e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}