Instruments-16bit-3B-4Epoch / trainer_state.json

Model save

e102c6d verified 8 months ago

6.56 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 4.0,
	"eval_steps": 500,
	"global_step": 8240,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.12135922330097088,
	"grad_norm": 0.9599943161010742,
	"learning_rate": 9.698956057295461e-05,
	"loss": 1.2196,
	"step": 250
	},
	{
	"epoch": 0.24271844660194175,
	"grad_norm": 1.0285232067108154,
	"learning_rate": 9.395484340859432e-05,
	"loss": 0.6688,
	"step": 500
	},
	{
	"epoch": 0.3640776699029126,
	"grad_norm": 0.8658091425895691,
	"learning_rate": 9.092012624423404e-05,
	"loss": 0.5603,
	"step": 750
	},
	{
	"epoch": 0.4854368932038835,
	"grad_norm": 0.8770154714584351,
	"learning_rate": 8.788540907987377e-05,
	"loss": 0.5165,
	"step": 1000
	},
	{
	"epoch": 0.6067961165048543,
	"grad_norm": 0.6812583208084106,
	"learning_rate": 8.485069191551348e-05,
	"loss": 0.5015,
	"step": 1250
	},
	{
	"epoch": 0.7281553398058253,
	"grad_norm": 0.6911689043045044,
	"learning_rate": 8.181597475115321e-05,
	"loss": 0.4846,
	"step": 1500
	},
	{
	"epoch": 0.8495145631067961,
	"grad_norm": 0.6556753516197205,
	"learning_rate": 7.878125758679291e-05,
	"loss": 0.4769,
	"step": 1750
	},
	{
	"epoch": 0.970873786407767,
	"grad_norm": 0.5876182317733765,
	"learning_rate": 7.574654042243264e-05,
	"loss": 0.4701,
	"step": 2000
	},
	{
	"epoch": 1.0922330097087378,
	"grad_norm": 0.6175569891929626,
	"learning_rate": 7.271182325807235e-05,
	"loss": 0.4616,
	"step": 2250
	},
	{
	"epoch": 1.2135922330097086,
	"grad_norm": 0.6353004574775696,
	"learning_rate": 6.967710609371208e-05,
	"loss": 0.4518,
	"step": 2500
	},
	{
	"epoch": 1.3349514563106797,
	"grad_norm": 0.5879459977149963,
	"learning_rate": 6.664238892935178e-05,
	"loss": 0.4483,
	"step": 2750
	},
	{
	"epoch": 1.4563106796116505,
	"grad_norm": 0.6575189232826233,
	"learning_rate": 6.360767176499151e-05,
	"loss": 0.4367,
	"step": 3000
	},
	{
	"epoch": 1.5776699029126213,
	"grad_norm": 0.724533200263977,
	"learning_rate": 6.0572954600631224e-05,
	"loss": 0.4226,
	"step": 3250
	},
	{
	"epoch": 1.6990291262135924,
	"grad_norm": 0.7686433792114258,
	"learning_rate": 5.7538237436270945e-05,
	"loss": 0.4104,
	"step": 3500
	},
	{
	"epoch": 1.820388349514563,
	"grad_norm": 0.7101556658744812,
	"learning_rate": 5.450352027191066e-05,
	"loss": 0.3954,
	"step": 3750
	},
	{
	"epoch": 1.941747572815534,
	"grad_norm": 0.7856088280677795,
	"learning_rate": 5.146880310755038e-05,
	"loss": 0.3827,
	"step": 4000
	},
	{
	"epoch": 2.063106796116505,
	"grad_norm": 0.8785816431045532,
	"learning_rate": 4.84340859431901e-05,
	"loss": 0.3585,
	"step": 4250
	},
	{
	"epoch": 2.1844660194174756,
	"grad_norm": 0.858726441860199,
	"learning_rate": 4.539936877882982e-05,
	"loss": 0.341,
	"step": 4500
	},
	{
	"epoch": 2.3058252427184467,
	"grad_norm": 0.8789017200469971,
	"learning_rate": 4.236465161446954e-05,
	"loss": 0.3313,
	"step": 4750
	},
	{
	"epoch": 2.4271844660194173,
	"grad_norm": 0.9984813928604126,
	"learning_rate": 3.932993445010925e-05,
	"loss": 0.321,
	"step": 5000
	},
	{
	"epoch": 2.5485436893203883,
	"grad_norm": 0.8649771213531494,
	"learning_rate": 3.6295217285748975e-05,
	"loss": 0.3104,
	"step": 5250
	},
	{
	"epoch": 2.6699029126213594,
	"grad_norm": 0.9905620217323303,
	"learning_rate": 3.326050012138869e-05,
	"loss": 0.3008,
	"step": 5500
	},
	{
	"epoch": 2.79126213592233,
	"grad_norm": 0.9460727572441101,
	"learning_rate": 3.022578295702841e-05,
	"loss": 0.2965,
	"step": 5750
	},
	{
	"epoch": 2.912621359223301,
	"grad_norm": 0.8885589241981506,
	"learning_rate": 2.7191065792668125e-05,
	"loss": 0.2876,
	"step": 6000
	},
	{
	"epoch": 3.033980582524272,
	"grad_norm": 0.9261214733123779,
	"learning_rate": 2.4156348628307843e-05,
	"loss": 0.2759,
	"step": 6250
	},
	{
	"epoch": 3.1553398058252426,
	"grad_norm": 0.9241772294044495,
	"learning_rate": 2.112163146394756e-05,
	"loss": 0.2618,
	"step": 6500
	},
	{
	"epoch": 3.2766990291262137,
	"grad_norm": 0.929602861404419,
	"learning_rate": 1.808691429958728e-05,
	"loss": 0.2578,
	"step": 6750
	},
	{
	"epoch": 3.3980582524271843,
	"grad_norm": 0.9885833263397217,
	"learning_rate": 1.5052197135226997e-05,
	"loss": 0.2547,
	"step": 7000
	},
	{
	"epoch": 3.5194174757281553,
	"grad_norm": 0.9474493861198425,
	"learning_rate": 1.2017479970866715e-05,
	"loss": 0.2528,
	"step": 7250
	},
	{
	"epoch": 3.6407766990291264,
	"grad_norm": 0.9105657935142517,
	"learning_rate": 8.982762806506435e-06,
	"loss": 0.25,
	"step": 7500
	},
	{
	"epoch": 3.762135922330097,
	"grad_norm": 0.9185407161712646,
	"learning_rate": 5.948045642146152e-06,
	"loss": 0.2463,
	"step": 7750
	},
	{
	"epoch": 3.883495145631068,
	"grad_norm": 0.8763870000839233,
	"learning_rate": 2.9133284777858704e-06,
	"loss": 0.2462,
	"step": 8000
	},
	{
	"epoch": 4.0,
	"step": 8240,
	"total_flos": 1.1079720316327956e+18,
	"train_loss": 0.39851856185394585,
	"train_runtime": 97554.2615,
	"train_samples_per_second": 5.406,
	"train_steps_per_second": 0.084
	}
	],
	"logging_steps": 250,
	"max_steps": 8240,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 4,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": false,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.1079720316327956e+18,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}