Commit History

best (Trained with Unsloth)
2071a0a
verified

avsolatorio commited on

{"epoch": 20.0, "global_step": 1740, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 3.209274775830528e+17, "log_history": [{"loss": 1.4833, "grad_norm": 0.6487900614738464, "learning_rate": 6.968641114982578e-07, "epoch": 19.54022988505747, "step": 1700}, {"eval_loss": 1.4727907180786133, "eval_runtime": 1.1058, "eval_samples_per_second": 8.139, "eval_steps_per_second": 2.713, "epoch": 19.54022988505747, "step": 1700}], "best_metric": 1.4655534029006958, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-1250", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": true, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
3306171
verified

avsolatorio commited on

{"epoch": 19.0, "global_step": 1653, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 3.04327780466688e+17, "log_history": [{"loss": 1.4461, "grad_norm": 0.629298746585846, "learning_rate": 2.4390243902439023e-06, "epoch": 18.39080459770115, "step": 1600}, {"eval_loss": 1.472578525543213, "eval_runtime": 1.1085, "eval_samples_per_second": 8.119, "eval_steps_per_second": 2.706, "epoch": 18.39080459770115, "step": 1600}, {"loss": 1.4669, "grad_norm": 0.6036643385887146, "learning_rate": 1.5679442508710803e-06, "epoch": 18.96551724137931, "step": 1650}, {"eval_loss": 1.4724897146224976, "eval_runtime": 1.0953, "eval_samples_per_second": 8.217, "eval_steps_per_second": 2.739, "epoch": 18.96551724137931, "step": 1650}], "best_metric": 1.4655534029006958, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-1250", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
4d27389
verified

avsolatorio commited on

{"epoch": 18.0, "global_step": 1566, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 2.85883672559616e+17, "log_history": [{"loss": 1.4601, "grad_norm": 0.5961132645606995, "learning_rate": 4.181184668989547e-06, "epoch": 17.24137931034483, "step": 1500}, {"eval_loss": 1.4710330963134766, "eval_runtime": 1.1073, "eval_samples_per_second": 8.128, "eval_steps_per_second": 2.709, "epoch": 17.24137931034483, "step": 1500}, {"loss": 1.4883, "grad_norm": 0.5646959543228149, "learning_rate": 3.3101045296167248e-06, "epoch": 17.816091954022987, "step": 1550}, {"eval_loss": 1.4707540273666382, "eval_runtime": 1.0942, "eval_samples_per_second": 8.225, "eval_steps_per_second": 2.742, "epoch": 17.816091954022987, "step": 1550}], "best_metric": 1.4655534029006958, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-1250", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
c5b2cec
verified

avsolatorio commited on

{"epoch": 17.0, "global_step": 1479, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 2.67439564652544e+17, "log_history": [{"loss": 1.4858, "grad_norm": 0.5270890593528748, "learning_rate": 5.923344947735192e-06, "epoch": 16.091954022988507, "step": 1400}, {"eval_loss": 1.4679118394851685, "eval_runtime": 1.1048, "eval_samples_per_second": 8.147, "eval_steps_per_second": 2.716, "epoch": 16.091954022988507, "step": 1400}, {"loss": 1.4837, "grad_norm": 0.6481783390045166, "learning_rate": 5.052264808362369e-06, "epoch": 16.666666666666668, "step": 1450}, {"eval_loss": 1.470719814300537, "eval_runtime": 1.0923, "eval_samples_per_second": 8.24, "eval_steps_per_second": 2.747, "epoch": 16.666666666666668, "step": 1450}], "best_metric": 1.4655534029006958, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-1250", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
006df89
verified

avsolatorio commited on

{"epoch": 16.0, "global_step": 1392, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 2.48995456745472e+17, "log_history": [{"loss": 1.4924, "grad_norm": 0.5719926953315735, "learning_rate": 6.794425087108014e-06, "epoch": 15.517241379310345, "step": 1350}, {"eval_loss": 1.4676774740219116, "eval_runtime": 1.1109, "eval_samples_per_second": 8.102, "eval_steps_per_second": 2.701, "epoch": 15.517241379310345, "step": 1350}], "best_metric": 1.4655534029006958, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-1250", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
2339a36
verified

avsolatorio commited on

{"epoch": 15.0, "global_step": 1305, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 2.39773402791936e+17, "log_history": [{"loss": 1.4896, "grad_norm": 0.5859385132789612, "learning_rate": 8.536585365853658e-06, "epoch": 14.367816091954023, "step": 1250}, {"eval_loss": 1.4655534029006958, "eval_runtime": 1.1078, "eval_samples_per_second": 8.124, "eval_steps_per_second": 2.708, "epoch": 14.367816091954023, "step": 1250}, {"loss": 1.4978, "grad_norm": 0.5861697196960449, "learning_rate": 7.665505226480837e-06, "epoch": 14.942528735632184, "step": 1300}, {"eval_loss": 1.4668822288513184, "eval_runtime": 1.0938, "eval_samples_per_second": 8.229, "eval_steps_per_second": 2.743, "epoch": 14.942528735632184, "step": 1300}], "best_metric": 1.4655534029006958, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-1250", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
8698572
verified

avsolatorio commited on

{"epoch": 14.0, "global_step": 1218, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 2.21329294884864e+17, "log_history": [{"loss": 1.5258, "grad_norm": 0.5444657802581787, "learning_rate": 1.0278745644599303e-05, "epoch": 13.218390804597702, "step": 1150}, {"eval_loss": 1.4695876836776733, "eval_runtime": 1.8225, "eval_samples_per_second": 4.938, "eval_steps_per_second": 1.646, "epoch": 13.218390804597702, "step": 1150}, {"loss": 1.522, "grad_norm": 0.574558436870575, "learning_rate": 9.40766550522648e-06, "epoch": 13.793103448275861, "step": 1200}, {"eval_loss": 1.4667538404464722, "eval_runtime": 1.0909, "eval_samples_per_second": 8.25, "eval_steps_per_second": 2.75, "epoch": 13.793103448275861, "step": 1200}], "best_metric": 1.4667538404464722, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-1200", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
80471bf
verified

avsolatorio commited on

{"epoch": 13.0, "global_step": 1131, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 2.02885186977792e+17, "log_history": [{"loss": 1.5527, "grad_norm": 0.7230589985847473, "learning_rate": 1.2020905923344948e-05, "epoch": 12.068965517241379, "step": 1050}, {"eval_loss": 1.467178463935852, "eval_runtime": 1.8278, "eval_samples_per_second": 4.924, "eval_steps_per_second": 1.641, "epoch": 12.068965517241379, "step": 1050}, {"loss": 1.5039, "grad_norm": 0.5226315855979919, "learning_rate": 1.1149825783972125e-05, "epoch": 12.64367816091954, "step": 1100}, {"eval_loss": 1.468620777130127, "eval_runtime": 1.8123, "eval_samples_per_second": 4.966, "eval_steps_per_second": 1.655, "epoch": 12.64367816091954, "step": 1100}], "best_metric": 1.467178463935852, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-1050", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
55d455a
verified

avsolatorio commited on

{"epoch": 12.0, "global_step": 1044, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 1.8444107907072e+17, "log_history": [{"loss": 1.5572, "grad_norm": 0.5147708058357239, "learning_rate": 1.2891986062717772e-05, "epoch": 11.494252873563218, "step": 1000}, {"eval_loss": 1.4700002670288086, "eval_runtime": 1.1086, "eval_samples_per_second": 8.119, "eval_steps_per_second": 2.706, "epoch": 11.494252873563218, "step": 1000}], "best_metric": 1.4700002670288086, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-1000", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
32fbb94
verified

avsolatorio commited on

{"epoch": 11.0, "global_step": 957, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 1.75219025117184e+17, "log_history": [{"loss": 1.5879, "grad_norm": 0.4060925841331482, "learning_rate": 1.4634146341463415e-05, "epoch": 10.344827586206897, "step": 900}, {"eval_loss": 1.4765105247497559, "eval_runtime": 1.1075, "eval_samples_per_second": 8.126, "eval_steps_per_second": 2.709, "epoch": 10.344827586206897, "step": 900}, {"loss": 1.5554, "grad_norm": 0.4849716126918793, "learning_rate": 1.3763066202090593e-05, "epoch": 10.919540229885058, "step": 950}, {"eval_loss": 1.470994472503662, "eval_runtime": 1.0945, "eval_samples_per_second": 8.223, "eval_steps_per_second": 2.741, "epoch": 10.919540229885058, "step": 950}], "best_metric": 1.470994472503662, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-950", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
d7aa06b
verified

avsolatorio commited on

{"epoch": 10.0, "global_step": 870, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 1.56774917210112e+17, "log_history": [{"loss": 1.6116, "grad_norm": 0.4694165587425232, "learning_rate": 1.6376306620209058e-05, "epoch": 9.195402298850574, "step": 800}, {"eval_loss": 1.485896110534668, "eval_runtime": 1.1024, "eval_samples_per_second": 8.164, "eval_steps_per_second": 2.721, "epoch": 9.195402298850574, "step": 800}, {"loss": 1.5946, "grad_norm": 0.4846917688846588, "learning_rate": 1.5505226480836236e-05, "epoch": 9.770114942528735, "step": 850}, {"eval_loss": 1.4794578552246094, "eval_runtime": 1.094, "eval_samples_per_second": 8.226, "eval_steps_per_second": 2.742, "epoch": 9.770114942528735, "step": 850}], "best_metric": 1.4794578552246094, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-850", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
6de48e3
verified

avsolatorio commited on

{"epoch": 9.0, "global_step": 783, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 1.3833080930304e+17, "log_history": [{"loss": 1.6361, "grad_norm": 0.462488055229187, "learning_rate": 1.8118466898954705e-05, "epoch": 8.045977011494253, "step": 700}, {"eval_loss": 1.501307487487793, "eval_runtime": 1.1013, "eval_samples_per_second": 8.172, "eval_steps_per_second": 2.724, "epoch": 8.045977011494253, "step": 700}, {"loss": 1.6245, "grad_norm": 0.46570727229118347, "learning_rate": 1.7247386759581883e-05, "epoch": 8.620689655172415, "step": 750}, {"eval_loss": 1.4913685321807861, "eval_runtime": 1.0929, "eval_samples_per_second": 8.235, "eval_steps_per_second": 2.745, "epoch": 8.620689655172415, "step": 750}], "best_metric": 1.4913685321807861, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-750", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
03bc8e3
verified

avsolatorio commited on

{"epoch": 8.0, "global_step": 696, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 1.19886701395968e+17, "log_history": [{"loss": 1.6842, "grad_norm": 0.5990384817123413, "learning_rate": 1.8989547038327526e-05, "epoch": 7.471264367816092, "step": 650}, {"eval_loss": 1.51298987865448, "eval_runtime": 1.1075, "eval_samples_per_second": 8.126, "eval_steps_per_second": 2.709, "epoch": 7.471264367816092, "step": 650}], "best_metric": 1.51298987865448, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-650", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
936b7b3
verified

avsolatorio commited on

{"epoch": 7.0, "global_step": 609, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 1.10664647442432e+17, "log_history": [{"loss": 1.7119, "grad_norm": 0.4593299925327301, "learning_rate": 2.073170731707317e-05, "epoch": 6.32183908045977, "step": 550}, {"eval_loss": 1.5443768501281738, "eval_runtime": 1.1084, "eval_samples_per_second": 8.12, "eval_steps_per_second": 2.707, "epoch": 6.32183908045977, "step": 550}, {"loss": 1.7106, "grad_norm": 0.6204197406768799, "learning_rate": 1.9860627177700348e-05, "epoch": 6.896551724137931, "step": 600}, {"eval_loss": 1.5257289409637451, "eval_runtime": 1.0947, "eval_samples_per_second": 8.222, "eval_steps_per_second": 2.741, "epoch": 6.896551724137931, "step": 600}], "best_metric": 1.5257289409637451, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-600", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
0e6f5e4
verified

avsolatorio commited on

{"epoch": 6.0, "global_step": 522, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 9.222053953536e+16, "log_history": [{"loss": 1.7832, "grad_norm": 0.5402464866638184, "learning_rate": 2.2473867595818816e-05, "epoch": 5.172413793103448, "step": 450}, {"eval_loss": 1.594441294670105, "eval_runtime": 1.1046, "eval_samples_per_second": 8.147, "eval_steps_per_second": 2.716, "epoch": 5.172413793103448, "step": 450}, {"loss": 1.7576, "grad_norm": 0.5000119209289551, "learning_rate": 2.160278745644599e-05, "epoch": 5.747126436781609, "step": 500}, {"eval_loss": 1.5654484033584595, "eval_runtime": 1.092, "eval_samples_per_second": 8.242, "eval_steps_per_second": 2.747, "epoch": 5.747126436781609, "step": 500}], "best_metric": 1.5654484033584595, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-500", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
ac97d10
verified

avsolatorio commited on

{"epoch": 5.0, "global_step": 435, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 7.3776431628288e+16, "log_history": [{"loss": 1.9683, "grad_norm": 0.7364537119865417, "learning_rate": 2.4216027874564463e-05, "epoch": 4.022988505747127, "step": 350}, {"eval_loss": 1.7096484899520874, "eval_runtime": 1.1015, "eval_samples_per_second": 8.171, "eval_steps_per_second": 2.724, "epoch": 4.022988505747127, "step": 350}, {"loss": 1.8601, "grad_norm": 0.7380545139312744, "learning_rate": 2.334494773519164e-05, "epoch": 4.597701149425287, "step": 400}, {"eval_loss": 1.6371030807495117, "eval_runtime": 1.0935, "eval_samples_per_second": 8.23, "eval_steps_per_second": 2.743, "epoch": 4.597701149425287, "step": 400}], "best_metric": 1.6371030807495117, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-400", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
20eb589
verified

avsolatorio commited on

{"epoch": 4.0, "global_step": 348, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 5.5332323721216e+16, "log_history": [{"loss": 2.0595, "grad_norm": 0.5517488718032837, "learning_rate": 2.5087108013937284e-05, "epoch": 3.4482758620689653, "step": 300}, {"eval_loss": 1.8177926540374756, "eval_runtime": 1.1116, "eval_samples_per_second": 8.096, "eval_steps_per_second": 2.699, "epoch": 3.4482758620689653, "step": 300}], "best_metric": 1.8177926540374756, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-300", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
6e6f5bd
verified

avsolatorio commited on

{"epoch": 3.0, "global_step": 261, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 4.611026976768e+16, "log_history": [{"loss": 2.6175, "grad_norm": 0.7418221831321716, "learning_rate": 2.6829268292682928e-05, "epoch": 2.2988505747126435, "step": 200}, {"eval_loss": 2.265676498413086, "eval_runtime": 1.1348, "eval_samples_per_second": 7.931, "eval_steps_per_second": 2.644, "epoch": 2.2988505747126435, "step": 200}, {"loss": 2.2472, "grad_norm": 0.7443752884864807, "learning_rate": 2.5958188153310106e-05, "epoch": 2.873563218390805, "step": 250}, {"eval_loss": 1.989217758178711, "eval_runtime": 1.0924, "eval_samples_per_second": 8.238, "eval_steps_per_second": 2.746, "epoch": 2.873563218390805, "step": 250}], "best_metric": 1.989217758178711, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-250", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
7cf61d9
verified

avsolatorio commited on

{"epoch": 2.0, "global_step": 174, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 2.7666161860608e+16, "log_history": [{"loss": 3.518, "grad_norm": 0.633488118648529, "learning_rate": 2.857142857142857e-05, "epoch": 1.1494252873563218, "step": 100}, {"eval_loss": 3.1879305839538574, "eval_runtime": 1.0965, "eval_samples_per_second": 8.208, "eval_steps_per_second": 2.736, "epoch": 1.1494252873563218, "step": 100}, {"loss": 3.0634, "grad_norm": 0.6384217143058777, "learning_rate": 2.770034843205575e-05, "epoch": 1.7241379310344827, "step": 150}, {"eval_loss": 2.6936521530151367, "eval_runtime": 1.0919, "eval_samples_per_second": 8.243, "eval_steps_per_second": 2.748, "epoch": 1.7241379310344827, "step": 150}], "best_metric": 2.6936521530151367, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-150", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
484d989
verified

avsolatorio commited on

{"epoch": 1.0, "global_step": 87, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 9222053953536000.0, "log_history": [{"loss": 4.3402, "grad_norm": 0.8734406232833862, "learning_rate": 2.9442508710801396e-05, "epoch": 0.5747126436781609, "step": 50}, {"eval_loss": 3.7241973876953125, "eval_runtime": 1.118, "eval_samples_per_second": 8.05, "eval_steps_per_second": 2.683, "epoch": 0.5747126436781609, "step": 50}], "best_metric": 3.7241973876953125, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-50", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
5051da4
verified

avsolatorio commited on

{"epoch": 1.0, "global_step": 87, "max_steps": 1740, "logging_steps": 50, "eval_steps": 50, "save_steps": 50, "train_batch_size": 2, "num_train_epochs": 20, "num_input_tokens_seen": 0, "total_flos": 9222053953536000.0, "log_history": [{"loss": 4.3402, "grad_norm": 0.8734406232833862, "learning_rate": 2.9442508710801396e-05, "epoch": 0.5747126436781609, "step": 50}, {"eval_loss": 3.7241973876953125, "eval_runtime": 1.118, "eval_samples_per_second": 8.05, "eval_steps_per_second": 2.683, "epoch": 0.5747126436781609, "step": 50}], "best_metric": 3.7241973876953125, "best_model_checkpoint": "./pf-data-use-unsloth-phi-3.5-simpleschema-thinking-prwp-manual-914-train-20epochs-1738770532/checkpoint-50", "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": {"TrainerControl": {"args": {"should_training_stop": false, "should_epoch_stop": false, "should_save": true, "should_evaluate": false, "should_log": false}, "attributes": {}}}} (Trained with Unsloth)
0022915
verified

avsolatorio commited on

Upload README.md with huggingface_hub
a20929d
verified

avsolatorio commited on

initial commit
c2136be
verified

avsolatorio commited on