rbelanec commited on
Commit
40e4694
verified
1 Parent(s): ac3d5a9

Training in progress, step 39800

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d798ba5f55896a82baf173cbb9b35763d2be4ca68fe72e9ed7bcd4170e7a9e1
3
  size 18124968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d6b86ed6b6f45e54a059bf835433228902439e250a95b7cf859495eb1f9d9c6
3
  size 18124968
trainer_log.jsonl CHANGED
@@ -8116,3 +8116,44 @@
8116
  {"current_steps": 39595, "total_steps": 40000, "loss": 0.0, "lr": 1.2708814586862016e-08, "epoch": 280.8199643493761, "percentage": 98.99, "elapsed_time": "9:24:23", "remaining_time": "0:05:46", "throughput": 2985.11, "total_tokens": 101084920}
8117
  {"current_steps": 39600, "total_steps": 40000, "loss": 0.0, "lr": 1.2397742806111168e-08, "epoch": 280.85561497326205, "percentage": 99.0, "elapsed_time": "9:24:27", "remaining_time": "0:05:42", "throughput": 2985.08, "total_tokens": 101096120}
8118
  {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.6512401700019836, "epoch": 280.85561497326205, "percentage": 99.0, "elapsed_time": "9:24:31", "remaining_time": "0:05:42", "throughput": 2984.73, "total_tokens": 101096120}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8116
  {"current_steps": 39595, "total_steps": 40000, "loss": 0.0, "lr": 1.2708814586862016e-08, "epoch": 280.8199643493761, "percentage": 98.99, "elapsed_time": "9:24:23", "remaining_time": "0:05:46", "throughput": 2985.11, "total_tokens": 101084920}
8117
  {"current_steps": 39600, "total_steps": 40000, "loss": 0.0, "lr": 1.2397742806111168e-08, "epoch": 280.85561497326205, "percentage": 99.0, "elapsed_time": "9:24:27", "remaining_time": "0:05:42", "throughput": 2985.08, "total_tokens": 101096120}
8118
  {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.6512401700019836, "epoch": 280.85561497326205, "percentage": 99.0, "elapsed_time": "9:24:31", "remaining_time": "0:05:42", "throughput": 2984.73, "total_tokens": 101096120}
8119
+ {"current_steps": 39605, "total_steps": 40000, "loss": 0.0, "lr": 1.209052442764369e-08, "epoch": 280.89126559714794, "percentage": 99.01, "elapsed_time": "9:24:37", "remaining_time": "0:05:37", "throughput": 2984.56, "total_tokens": 101110520}
8120
+ {"current_steps": 39610, "total_steps": 40000, "loss": 0.0, "lr": 1.17871594988328e-08, "epoch": 280.9269162210339, "percentage": 99.02, "elapsed_time": "9:24:41", "remaining_time": "0:05:33", "throughput": 2984.59, "total_tokens": 101123768}
8121
+ {"current_steps": 39615, "total_steps": 40000, "loss": 0.0, "lr": 1.1487648066466072e-08, "epoch": 280.96256684491976, "percentage": 99.04, "elapsed_time": "9:24:46", "remaining_time": "0:05:29", "throughput": 2984.58, "total_tokens": 101135672}
8122
+ {"current_steps": 39620, "total_steps": 40000, "loss": 0.0, "lr": 1.1191990176728784e-08, "epoch": 280.9982174688057, "percentage": 99.05, "elapsed_time": "9:24:50", "remaining_time": "0:05:25", "throughput": 2984.58, "total_tokens": 101147992}
8123
+ {"current_steps": 39625, "total_steps": 40000, "loss": 0.0, "lr": 1.0900185875215018e-08, "epoch": 281.0285204991087, "percentage": 99.06, "elapsed_time": "9:24:54", "remaining_time": "0:05:20", "throughput": 2984.49, "total_tokens": 101156960}
8124
+ {"current_steps": 39630, "total_steps": 40000, "loss": 0.0, "lr": 1.0612235206924891e-08, "epoch": 281.06417112299465, "percentage": 99.08, "elapsed_time": "9:24:58", "remaining_time": "0:05:16", "throughput": 2984.52, "total_tokens": 101170016}
8125
+ {"current_steps": 39635, "total_steps": 40000, "loss": 0.0, "lr": 1.0328138216264549e-08, "epoch": 281.0998217468806, "percentage": 99.09, "elapsed_time": "9:25:02", "remaining_time": "0:05:12", "throughput": 2984.56, "total_tokens": 101183584}
8126
+ {"current_steps": 39640, "total_steps": 40000, "loss": 0.0, "lr": 1.004789494704339e-08, "epoch": 281.1354723707665, "percentage": 99.1, "elapsed_time": "9:25:06", "remaining_time": "0:05:07", "throughput": 2984.58, "total_tokens": 101196672}
8127
+ {"current_steps": 39645, "total_steps": 40000, "loss": 0.0, "lr": 9.771505442482397e-09, "epoch": 281.1711229946524, "percentage": 99.11, "elapsed_time": "9:25:10", "remaining_time": "0:05:03", "throughput": 2984.59, "total_tokens": 101209344}
8128
+ {"current_steps": 39650, "total_steps": 40000, "loss": 0.0, "lr": 9.498969745200259e-09, "epoch": 281.2067736185383, "percentage": 99.12, "elapsed_time": "9:25:14", "remaining_time": "0:04:59", "throughput": 2984.61, "total_tokens": 101222240}
8129
+ {"current_steps": 39655, "total_steps": 40000, "loss": 0.0, "lr": 9.230287897230017e-09, "epoch": 281.24242424242425, "percentage": 99.14, "elapsed_time": "9:25:18", "remaining_time": "0:04:55", "throughput": 2984.64, "total_tokens": 101235648}
8130
+ {"current_steps": 39660, "total_steps": 40000, "loss": 0.0, "lr": 8.965459940002419e-09, "epoch": 281.27807486631013, "percentage": 99.15, "elapsed_time": "9:25:22", "remaining_time": "0:04:50", "throughput": 2984.62, "total_tokens": 101247232}
8131
+ {"current_steps": 39665, "total_steps": 40000, "loss": 0.0, "lr": 8.704485914357019e-09, "epoch": 281.3137254901961, "percentage": 99.16, "elapsed_time": "9:25:27", "remaining_time": "0:04:46", "throughput": 2984.65, "total_tokens": 101260512}
8132
+ {"current_steps": 39670, "total_steps": 40000, "loss": 0.0, "lr": 8.447365860539402e-09, "epoch": 281.349376114082, "percentage": 99.17, "elapsed_time": "9:25:31", "remaining_time": "0:04:42", "throughput": 2984.69, "total_tokens": 101274016}
8133
+ {"current_steps": 39675, "total_steps": 40000, "loss": 0.0, "lr": 8.194099818201184e-09, "epoch": 281.3850267379679, "percentage": 99.19, "elapsed_time": "9:25:35", "remaining_time": "0:04:37", "throughput": 2984.68, "total_tokens": 101285952}
8134
+ {"current_steps": 39680, "total_steps": 40000, "loss": 0.0, "lr": 7.944687826400011e-09, "epoch": 281.42067736185385, "percentage": 99.2, "elapsed_time": "9:25:39", "remaining_time": "0:04:33", "throughput": 2984.72, "total_tokens": 101299488}
8135
+ {"current_steps": 39685, "total_steps": 40000, "loss": 0.0, "lr": 7.699129923599557e-09, "epoch": 281.45632798573973, "percentage": 99.21, "elapsed_time": "9:25:43", "remaining_time": "0:04:29", "throughput": 2984.77, "total_tokens": 101313312}
8136
+ {"current_steps": 39690, "total_steps": 40000, "loss": 0.0, "lr": 7.457426147663982e-09, "epoch": 281.4919786096257, "percentage": 99.22, "elapsed_time": "9:25:47", "remaining_time": "0:04:25", "throughput": 2984.77, "total_tokens": 101325920}
8137
+ {"current_steps": 39695, "total_steps": 40000, "loss": 0.0, "lr": 7.219576535871797e-09, "epoch": 281.52762923351156, "percentage": 99.24, "elapsed_time": "9:25:51", "remaining_time": "0:04:20", "throughput": 2984.79, "total_tokens": 101338752}
8138
+ {"current_steps": 39700, "total_steps": 40000, "loss": 0.0, "lr": 6.985581124896445e-09, "epoch": 281.5632798573975, "percentage": 99.25, "elapsed_time": "9:25:55", "remaining_time": "0:04:16", "throughput": 2984.81, "total_tokens": 101351520}
8139
+ {"current_steps": 39705, "total_steps": 40000, "loss": 0.0, "lr": 6.755439950828501e-09, "epoch": 281.59893048128345, "percentage": 99.26, "elapsed_time": "9:25:59", "remaining_time": "0:04:12", "throughput": 2984.83, "total_tokens": 101364576}
8140
+ {"current_steps": 39710, "total_steps": 40000, "loss": 0.0, "lr": 6.5291530491562444e-09, "epoch": 281.63458110516933, "percentage": 99.28, "elapsed_time": "9:26:04", "remaining_time": "0:04:08", "throughput": 2984.88, "total_tokens": 101378720}
8141
+ {"current_steps": 39715, "total_steps": 40000, "loss": 0.0, "lr": 6.3067204547739845e-09, "epoch": 281.6702317290553, "percentage": 99.29, "elapsed_time": "9:26:08", "remaining_time": "0:04:03", "throughput": 2984.86, "total_tokens": 101390144}
8142
+ {"current_steps": 39720, "total_steps": 40000, "loss": 0.0, "lr": 6.088142201987612e-09, "epoch": 281.70588235294116, "percentage": 99.3, "elapsed_time": "9:26:12", "remaining_time": "0:03:59", "throughput": 2984.85, "total_tokens": 101402144}
8143
+ {"current_steps": 39725, "total_steps": 40000, "loss": 0.0, "lr": 5.873418324503499e-09, "epoch": 281.7415329768271, "percentage": 99.31, "elapsed_time": "9:26:16", "remaining_time": "0:03:55", "throughput": 2984.86, "total_tokens": 101414816}
8144
+ {"current_steps": 39730, "total_steps": 40000, "loss": 0.0, "lr": 5.6625488554340465e-09, "epoch": 281.777183600713, "percentage": 99.33, "elapsed_time": "9:26:20", "remaining_time": "0:03:50", "throughput": 2984.92, "total_tokens": 101428992}
8145
+ {"current_steps": 39735, "total_steps": 40000, "loss": 0.0, "lr": 5.455533827297688e-09, "epoch": 281.81283422459893, "percentage": 99.34, "elapsed_time": "9:26:24", "remaining_time": "0:03:46", "throughput": 2984.92, "total_tokens": 101441120}
8146
+ {"current_steps": 39740, "total_steps": 40000, "loss": 0.0, "lr": 5.252373272018885e-09, "epoch": 281.8484848484849, "percentage": 99.35, "elapsed_time": "9:26:28", "remaining_time": "0:03:42", "throughput": 2984.92, "total_tokens": 101453408}
8147
+ {"current_steps": 39745, "total_steps": 40000, "loss": 0.0, "lr": 5.053067220925356e-09, "epoch": 281.88413547237076, "percentage": 99.36, "elapsed_time": "9:26:32", "remaining_time": "0:03:38", "throughput": 2984.94, "total_tokens": 101466560}
8148
+ {"current_steps": 39750, "total_steps": 40000, "loss": 0.0, "lr": 4.857615704759177e-09, "epoch": 281.9197860962567, "percentage": 99.38, "elapsed_time": "9:26:36", "remaining_time": "0:03:33", "throughput": 2985.0, "total_tokens": 101480768}
8149
+ {"current_steps": 39755, "total_steps": 40000, "loss": 0.0, "lr": 4.666018753654577e-09, "epoch": 281.9554367201426, "percentage": 99.39, "elapsed_time": "9:26:41", "remaining_time": "0:03:29", "throughput": 2985.05, "total_tokens": 101494816}
8150
+ {"current_steps": 39760, "total_steps": 40000, "loss": 0.0, "lr": 4.478276397162917e-09, "epoch": 281.99108734402853, "percentage": 99.4, "elapsed_time": "9:26:45", "remaining_time": "0:03:25", "throughput": 2985.08, "total_tokens": 101507936}
8151
+ {"current_steps": 39765, "total_steps": 40000, "loss": 0.0, "lr": 4.294388664233262e-09, "epoch": 282.02139037433153, "percentage": 99.41, "elapsed_time": "9:26:49", "remaining_time": "0:03:20", "throughput": 2985.06, "total_tokens": 101519088}
8152
+ {"current_steps": 39770, "total_steps": 40000, "loss": 0.0, "lr": 4.114355583223484e-09, "epoch": 282.0570409982175, "percentage": 99.42, "elapsed_time": "9:26:53", "remaining_time": "0:03:16", "throughput": 2985.05, "total_tokens": 101531216}
8153
+ {"current_steps": 39775, "total_steps": 40000, "loss": 0.0, "lr": 3.9381771818974845e-09, "epoch": 282.09269162210336, "percentage": 99.44, "elapsed_time": "9:26:57", "remaining_time": "0:03:12", "throughput": 2985.09, "total_tokens": 101544592}
8154
+ {"current_steps": 39780, "total_steps": 40000, "loss": 0.0, "lr": 3.765853487427973e-09, "epoch": 282.1283422459893, "percentage": 99.45, "elapsed_time": "9:27:01", "remaining_time": "0:03:08", "throughput": 2985.09, "total_tokens": 101556912}
8155
+ {"current_steps": 39785, "total_steps": 40000, "loss": 0.0, "lr": 3.5973845263825857e-09, "epoch": 282.16399286987524, "percentage": 99.46, "elapsed_time": "9:27:05", "remaining_time": "0:03:03", "throughput": 2985.14, "total_tokens": 101570960}
8156
+ {"current_steps": 39790, "total_steps": 40000, "loss": 0.0, "lr": 3.4327703247488684e-09, "epoch": 282.19964349376113, "percentage": 99.48, "elapsed_time": "9:27:09", "remaining_time": "0:02:59", "throughput": 2985.17, "total_tokens": 101584304}
8157
+ {"current_steps": 39795, "total_steps": 40000, "loss": 0.0, "lr": 3.2720109079037443e-09, "epoch": 282.2352941176471, "percentage": 99.49, "elapsed_time": "9:27:13", "remaining_time": "0:02:55", "throughput": 2985.19, "total_tokens": 101597328}
8158
+ {"current_steps": 39800, "total_steps": 40000, "loss": 0.0, "lr": 3.1151063006468193e-09, "epoch": 282.27094474153296, "percentage": 99.5, "elapsed_time": "9:27:17", "remaining_time": "0:02:51", "throughput": 2985.2, "total_tokens": 101609904}
8159
+ {"current_steps": 39800, "total_steps": 40000, "eval_loss": 0.642926037311554, "epoch": 282.27094474153296, "percentage": 99.5, "elapsed_time": "9:27:21", "remaining_time": "0:02:51", "throughput": 2984.85, "total_tokens": 101609904}