rbelanec commited on
Commit
6445bf9
verified
1 Parent(s): 799696b

Training in progress, step 39600

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +40 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a7f98e700d2b750fa787b317f9675f6e990ccfe3ee4b10b540b840119aa60c2
3
  size 460928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a2af390a171906445dfd051c096228d6406eda6c13aab63e5e5413af697b3a2
3
  size 460928
trainer_log.jsonl CHANGED
@@ -8076,3 +8076,43 @@
8076
  {"current_steps": 39400, "total_steps": 40000, "loss": 0.0011, "lr": 0.00016707417762611975, "epoch": 121.60587326120556, "percentage": 98.5, "elapsed_time": "4:48:18", "remaining_time": "0:04:23", "throughput": 3478.49, "total_tokens": 60173616}
8077
  {"current_steps": 39400, "total_steps": 40000, "eval_loss": 1.4503459930419922, "epoch": 121.60587326120556, "percentage": 98.5, "elapsed_time": "4:48:25", "remaining_time": "0:04:23", "throughput": 3477.07, "total_tokens": 60173616}
8078
  {"current_steps": 39405, "total_steps": 40000, "loss": 0.0007, "lr": 0.00016430631053459543, "epoch": 121.62132921174653, "percentage": 98.51, "elapsed_time": "4:48:29", "remaining_time": "0:04:21", "throughput": 3476.79, "total_tokens": 60181200}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8076
  {"current_steps": 39400, "total_steps": 40000, "loss": 0.0011, "lr": 0.00016707417762611975, "epoch": 121.60587326120556, "percentage": 98.5, "elapsed_time": "4:48:18", "remaining_time": "0:04:23", "throughput": 3478.49, "total_tokens": 60173616}
8077
  {"current_steps": 39400, "total_steps": 40000, "eval_loss": 1.4503459930419922, "epoch": 121.60587326120556, "percentage": 98.5, "elapsed_time": "4:48:25", "remaining_time": "0:04:23", "throughput": 3477.07, "total_tokens": 60173616}
8078
  {"current_steps": 39405, "total_steps": 40000, "loss": 0.0007, "lr": 0.00016430631053459543, "epoch": 121.62132921174653, "percentage": 98.51, "elapsed_time": "4:48:29", "remaining_time": "0:04:21", "throughput": 3476.79, "total_tokens": 60181200}
8079
+ {"current_steps": 39410, "total_steps": 40000, "loss": 0.0006, "lr": 0.0001615615499899803, "epoch": 121.63678516228748, "percentage": 98.52, "elapsed_time": "4:48:31", "remaining_time": "0:04:19", "throughput": 3476.83, "total_tokens": 60188784}
8080
+ {"current_steps": 39415, "total_steps": 40000, "loss": 0.0006, "lr": 0.00015883989641556905, "epoch": 121.65224111282843, "percentage": 98.54, "elapsed_time": "4:48:33", "remaining_time": "0:04:16", "throughput": 3476.85, "total_tokens": 60196048}
8081
+ {"current_steps": 39420, "total_steps": 40000, "loss": 0.0005, "lr": 0.00015614135023105934, "epoch": 121.6676970633694, "percentage": 98.55, "elapsed_time": "4:48:35", "remaining_time": "0:04:14", "throughput": 3476.89, "total_tokens": 60203504}
8082
+ {"current_steps": 39425, "total_steps": 40000, "loss": 0.0006, "lr": 0.00015346591185261827, "epoch": 121.68315301391036, "percentage": 98.56, "elapsed_time": "4:48:37", "remaining_time": "0:04:12", "throughput": 3476.92, "total_tokens": 60210928}
8083
+ {"current_steps": 39430, "total_steps": 40000, "loss": 0.0006, "lr": 0.00015081358169281576, "epoch": 121.69860896445131, "percentage": 98.58, "elapsed_time": "4:48:39", "remaining_time": "0:04:10", "throughput": 3476.96, "total_tokens": 60218448}
8084
+ {"current_steps": 39435, "total_steps": 40000, "loss": 0.0007, "lr": 0.00014818436016069135, "epoch": 121.71406491499228, "percentage": 98.59, "elapsed_time": "4:48:41", "remaining_time": "0:04:08", "throughput": 3477.02, "total_tokens": 60226416}
8085
+ {"current_steps": 39440, "total_steps": 40000, "loss": 0.0006, "lr": 0.00014557824766168735, "epoch": 121.72952086553323, "percentage": 98.6, "elapsed_time": "4:48:43", "remaining_time": "0:04:05", "throughput": 3477.07, "total_tokens": 60234032}
8086
+ {"current_steps": 39445, "total_steps": 40000, "loss": 0.0016, "lr": 0.00014299524459769896, "epoch": 121.74497681607419, "percentage": 98.61, "elapsed_time": "4:48:45", "remaining_time": "0:04:03", "throughput": 3477.13, "total_tokens": 60242000}
8087
+ {"current_steps": 39450, "total_steps": 40000, "loss": 0.0009, "lr": 0.0001404353513670742, "epoch": 121.76043276661515, "percentage": 98.62, "elapsed_time": "4:48:47", "remaining_time": "0:04:01", "throughput": 3477.17, "total_tokens": 60249616}
8088
+ {"current_steps": 39455, "total_steps": 40000, "loss": 0.0009, "lr": 0.0001378985683645806, "epoch": 121.7758887171561, "percentage": 98.64, "elapsed_time": "4:48:49", "remaining_time": "0:03:59", "throughput": 3477.26, "total_tokens": 60257904}
8089
+ {"current_steps": 39460, "total_steps": 40000, "loss": 0.0008, "lr": 0.0001353848959813886, "epoch": 121.79134466769706, "percentage": 98.65, "elapsed_time": "4:48:51", "remaining_time": "0:03:57", "throughput": 3477.27, "total_tokens": 60265008}
8090
+ {"current_steps": 39465, "total_steps": 40000, "loss": 0.0012, "lr": 0.00013289433460517142, "epoch": 121.80680061823801, "percentage": 98.66, "elapsed_time": "4:48:53", "remaining_time": "0:03:54", "throughput": 3477.33, "total_tokens": 60272976}
8091
+ {"current_steps": 39470, "total_steps": 40000, "loss": 0.0007, "lr": 0.00013042688462000518, "epoch": 121.82225656877898, "percentage": 98.67, "elapsed_time": "4:48:55", "remaining_time": "0:03:52", "throughput": 3477.36, "total_tokens": 60280208}
8092
+ {"current_steps": 39475, "total_steps": 40000, "loss": 0.0006, "lr": 0.0001279825464063855, "epoch": 121.83771251931994, "percentage": 98.69, "elapsed_time": "4:48:57", "remaining_time": "0:03:50", "throughput": 3477.41, "total_tokens": 60287888}
8093
+ {"current_steps": 39480, "total_steps": 40000, "loss": 0.0008, "lr": 0.00012556132034126087, "epoch": 121.85316846986089, "percentage": 98.7, "elapsed_time": "4:48:58", "remaining_time": "0:03:48", "throughput": 3477.45, "total_tokens": 60295568}
8094
+ {"current_steps": 39485, "total_steps": 40000, "loss": 0.0007, "lr": 0.0001231632067980326, "epoch": 121.86862442040186, "percentage": 98.71, "elapsed_time": "4:49:00", "remaining_time": "0:03:46", "throughput": 3477.52, "total_tokens": 60303664}
8095
+ {"current_steps": 39490, "total_steps": 40000, "loss": 0.0006, "lr": 0.00012078820614650486, "epoch": 121.88408037094281, "percentage": 98.72, "elapsed_time": "4:49:02", "remaining_time": "0:03:43", "throughput": 3477.56, "total_tokens": 60311184}
8096
+ {"current_steps": 39495, "total_steps": 40000, "loss": 0.0009, "lr": 0.00011843631875291804, "epoch": 121.89953632148377, "percentage": 98.74, "elapsed_time": "4:49:04", "remaining_time": "0:03:41", "throughput": 3477.61, "total_tokens": 60318928}
8097
+ {"current_steps": 39500, "total_steps": 40000, "loss": 0.0008, "lr": 0.00011610754497999863, "epoch": 121.91499227202473, "percentage": 98.75, "elapsed_time": "4:49:06", "remaining_time": "0:03:39", "throughput": 3477.66, "total_tokens": 60326608}
8098
+ {"current_steps": 39505, "total_steps": 40000, "loss": 0.001, "lr": 0.0001138018851868594, "epoch": 121.93044822256569, "percentage": 98.76, "elapsed_time": "4:49:08", "remaining_time": "0:03:37", "throughput": 3477.7, "total_tokens": 60334064}
8099
+ {"current_steps": 39510, "total_steps": 40000, "loss": 0.0012, "lr": 0.0001115193397290326, "epoch": 121.94590417310664, "percentage": 98.78, "elapsed_time": "4:49:10", "remaining_time": "0:03:35", "throughput": 3477.75, "total_tokens": 60341872}
8100
+ {"current_steps": 39515, "total_steps": 40000, "loss": 0.0007, "lr": 0.00010925990895856996, "epoch": 121.96136012364761, "percentage": 98.79, "elapsed_time": "4:49:12", "remaining_time": "0:03:32", "throughput": 3477.77, "total_tokens": 60349104}
8101
+ {"current_steps": 39520, "total_steps": 40000, "loss": 0.0008, "lr": 0.00010702359322385946, "epoch": 121.97681607418856, "percentage": 98.8, "elapsed_time": "4:49:14", "remaining_time": "0:03:30", "throughput": 3477.82, "total_tokens": 60356848}
8102
+ {"current_steps": 39525, "total_steps": 40000, "loss": 0.0009, "lr": 0.00010481039286977523, "epoch": 121.99227202472952, "percentage": 98.81, "elapsed_time": "4:49:16", "remaining_time": "0:03:28", "throughput": 3477.85, "total_tokens": 60364208}
8103
+ {"current_steps": 39530, "total_steps": 40000, "loss": 0.0008, "lr": 0.00010262030823764423, "epoch": 122.00618238021639, "percentage": 98.83, "elapsed_time": "4:49:18", "remaining_time": "0:03:26", "throughput": 3477.78, "total_tokens": 60370640}
8104
+ {"current_steps": 39535, "total_steps": 40000, "loss": 0.0006, "lr": 0.00010045333966517966, "epoch": 122.02163833075734, "percentage": 98.84, "elapsed_time": "4:49:20", "remaining_time": "0:03:24", "throughput": 3477.82, "total_tokens": 60378128}
8105
+ {"current_steps": 39540, "total_steps": 40000, "loss": 0.0006, "lr": 9.83094874865642e-05, "epoch": 122.0370942812983, "percentage": 98.85, "elapsed_time": "4:49:22", "remaining_time": "0:03:21", "throughput": 3477.85, "total_tokens": 60385456}
8106
+ {"current_steps": 39545, "total_steps": 40000, "loss": 0.0009, "lr": 9.618875203241672e-05, "epoch": 122.05255023183926, "percentage": 98.86, "elapsed_time": "4:49:24", "remaining_time": "0:03:19", "throughput": 3477.87, "total_tokens": 60392816}
8107
+ {"current_steps": 39550, "total_steps": 40000, "loss": 0.0008, "lr": 9.409113362977561e-05, "epoch": 122.06800618238022, "percentage": 98.88, "elapsed_time": "4:49:26", "remaining_time": "0:03:17", "throughput": 3477.92, "total_tokens": 60400400}
8108
+ {"current_steps": 39555, "total_steps": 40000, "loss": 0.0009, "lr": 9.20166326020988e-05, "epoch": 122.08346213292117, "percentage": 98.89, "elapsed_time": "4:49:28", "remaining_time": "0:03:15", "throughput": 3477.95, "total_tokens": 60407824}
8109
+ {"current_steps": 39560, "total_steps": 40000, "loss": 0.0007, "lr": 8.996524926933035e-05, "epoch": 122.09891808346214, "percentage": 98.9, "elapsed_time": "4:49:30", "remaining_time": "0:03:13", "throughput": 3477.99, "total_tokens": 60415376}
8110
+ {"current_steps": 39565, "total_steps": 40000, "loss": 0.0007, "lr": 8.793698394781723e-05, "epoch": 122.11437403400309, "percentage": 98.91, "elapsed_time": "4:49:32", "remaining_time": "0:03:11", "throughput": 3478.01, "total_tokens": 60422608}
8111
+ {"current_steps": 39570, "total_steps": 40000, "loss": 0.0007, "lr": 8.593183695030926e-05, "epoch": 122.12982998454405, "percentage": 98.92, "elapsed_time": "4:49:34", "remaining_time": "0:03:08", "throughput": 3478.08, "total_tokens": 60430768}
8112
+ {"current_steps": 39575, "total_steps": 40000, "loss": 0.0007, "lr": 8.39498085860757e-05, "epoch": 122.14528593508501, "percentage": 98.94, "elapsed_time": "4:49:36", "remaining_time": "0:03:06", "throughput": 3478.1, "total_tokens": 60437936}
8113
+ {"current_steps": 39580, "total_steps": 40000, "loss": 0.0007, "lr": 8.199089916072211e-05, "epoch": 122.16074188562597, "percentage": 98.95, "elapsed_time": "4:49:38", "remaining_time": "0:03:04", "throughput": 3478.15, "total_tokens": 60445552}
8114
+ {"current_steps": 39585, "total_steps": 40000, "loss": 0.0009, "lr": 8.005510897637346e-05, "epoch": 122.17619783616692, "percentage": 98.96, "elapsed_time": "4:49:40", "remaining_time": "0:03:02", "throughput": 3478.23, "total_tokens": 60453840}
8115
+ {"current_steps": 39590, "total_steps": 40000, "loss": 0.0012, "lr": 7.8142438331541e-05, "epoch": 122.19165378670789, "percentage": 98.98, "elapsed_time": "4:49:42", "remaining_time": "0:03:00", "throughput": 3478.26, "total_tokens": 60461296}
8116
+ {"current_steps": 39595, "total_steps": 40000, "loss": 0.0007, "lr": 7.625288752117209e-05, "epoch": 122.20710973724884, "percentage": 98.99, "elapsed_time": "4:49:44", "remaining_time": "0:02:57", "throughput": 3478.3, "total_tokens": 60468912}
8117
+ {"current_steps": 39600, "total_steps": 40000, "loss": 0.0008, "lr": 7.4386456836667e-05, "epoch": 122.2225656877898, "percentage": 99.0, "elapsed_time": "4:49:46", "remaining_time": "0:02:55", "throughput": 3478.35, "total_tokens": 60476592}
8118
+ {"current_steps": 39600, "total_steps": 40000, "eval_loss": 1.4520829916000366, "epoch": 122.2225656877898, "percentage": 99.0, "elapsed_time": "4:49:53", "remaining_time": "0:02:55", "throughput": 3476.94, "total_tokens": 60476592}