Music-paligemma-3b / trainer_state.json

Upload folder using huggingface_hub

e18bdd1 verified 4 months ago

7.55 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.6666666666666666,
	"eval_steps": 500,
	"global_step": 4000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.016666666666666666,
	"grad_norm": 2.8894264698028564,
	"learning_rate": 1.9836639439906652e-05,
	"loss": 3.963,
	"step": 100
	},
	{
	"epoch": 0.03333333333333333,
	"grad_norm": 1.6018348932266235,
	"learning_rate": 1.9669944990831806e-05,
	"loss": 1.6373,
	"step": 200
	},
	{
	"epoch": 0.05,
	"grad_norm": 1.5761862993240356,
	"learning_rate": 1.950325054175696e-05,
	"loss": 1.0775,
	"step": 300
	},
	{
	"epoch": 0.06666666666666667,
	"grad_norm": 2.316917657852173,
	"learning_rate": 1.9336556092682115e-05,
	"loss": 0.9137,
	"step": 400
	},
	{
	"epoch": 0.08333333333333333,
	"grad_norm": 1.1824716329574585,
	"learning_rate": 1.916986164360727e-05,
	"loss": 0.905,
	"step": 500
	},
	{
	"epoch": 0.1,
	"grad_norm": 1.3308736085891724,
	"learning_rate": 1.9003167194532424e-05,
	"loss": 0.8585,
	"step": 600
	},
	{
	"epoch": 0.11666666666666667,
	"grad_norm": 2.580430030822754,
	"learning_rate": 1.8836472745457578e-05,
	"loss": 0.8369,
	"step": 700
	},
	{
	"epoch": 0.13333333333333333,
	"grad_norm": 1.272025465965271,
	"learning_rate": 1.8669778296382732e-05,
	"loss": 0.8541,
	"step": 800
	},
	{
	"epoch": 0.15,
	"grad_norm": 0.856234610080719,
	"learning_rate": 1.8503083847307887e-05,
	"loss": 0.8249,
	"step": 900
	},
	{
	"epoch": 0.16666666666666666,
	"grad_norm": 3.3791863918304443,
	"learning_rate": 1.833638939823304e-05,
	"loss": 0.7451,
	"step": 1000
	},
	{
	"epoch": 0.18333333333333332,
	"grad_norm": 2.909228801727295,
	"learning_rate": 1.8169694949158195e-05,
	"loss": 0.764,
	"step": 1100
	},
	{
	"epoch": 0.2,
	"grad_norm": 4.349322319030762,
	"learning_rate": 1.800300050008335e-05,
	"loss": 0.7804,
	"step": 1200
	},
	{
	"epoch": 0.21666666666666667,
	"grad_norm": 2.2106833457946777,
	"learning_rate": 1.7836306051008504e-05,
	"loss": 0.6879,
	"step": 1300
	},
	{
	"epoch": 0.23333333333333334,
	"grad_norm": 1.6364814043045044,
	"learning_rate": 1.7669611601933655e-05,
	"loss": 0.7412,
	"step": 1400
	},
	{
	"epoch": 0.25,
	"grad_norm": 1.1314855813980103,
	"learning_rate": 1.7502917152858813e-05,
	"loss": 0.7475,
	"step": 1500
	},
	{
	"epoch": 0.26666666666666666,
	"grad_norm": 1.854612112045288,
	"learning_rate": 1.7336222703783964e-05,
	"loss": 0.7417,
	"step": 1600
	},
	{
	"epoch": 0.2833333333333333,
	"grad_norm": 1.044703722000122,
	"learning_rate": 1.716952825470912e-05,
	"loss": 0.7145,
	"step": 1700
	},
	{
	"epoch": 0.3,
	"grad_norm": 2.3508412837982178,
	"learning_rate": 1.7002833805634272e-05,
	"loss": 0.6941,
	"step": 1800
	},
	{
	"epoch": 0.31666666666666665,
	"grad_norm": 1.4268666505813599,
	"learning_rate": 1.683613935655943e-05,
	"loss": 0.6445,
	"step": 1900
	},
	{
	"epoch": 0.3333333333333333,
	"grad_norm": 3.743910312652588,
	"learning_rate": 1.666944490748458e-05,
	"loss": 0.6664,
	"step": 2000
	},
	{
	"epoch": 0.35,
	"grad_norm": 3.6548993587493896,
	"learning_rate": 1.6502750458409736e-05,
	"loss": 0.6595,
	"step": 2100
	},
	{
	"epoch": 0.36666666666666664,
	"grad_norm": 2.295973777770996,
	"learning_rate": 1.633605600933489e-05,
	"loss": 0.7187,
	"step": 2200
	},
	{
	"epoch": 0.38333333333333336,
	"grad_norm": 2.941169500350952,
	"learning_rate": 1.6169361560260044e-05,
	"loss": 0.6929,
	"step": 2300
	},
	{
	"epoch": 0.4,
	"grad_norm": 1.1575044393539429,
	"learning_rate": 1.60026671111852e-05,
	"loss": 0.672,
	"step": 2400
	},
	{
	"epoch": 0.4166666666666667,
	"grad_norm": 5.224921703338623,
	"learning_rate": 1.5835972662110353e-05,
	"loss": 0.6365,
	"step": 2500
	},
	{
	"epoch": 0.43333333333333335,
	"grad_norm": 2.8850746154785156,
	"learning_rate": 1.5669278213035507e-05,
	"loss": 0.6301,
	"step": 2600
	},
	{
	"epoch": 0.45,
	"grad_norm": 0.9255233407020569,
	"learning_rate": 1.550258376396066e-05,
	"loss": 0.6064,
	"step": 2700
	},
	{
	"epoch": 0.4666666666666667,
	"grad_norm": 2.414670467376709,
	"learning_rate": 1.5335889314885816e-05,
	"loss": 0.5662,
	"step": 2800
	},
	{
	"epoch": 0.48333333333333334,
	"grad_norm": 2.8756697177886963,
	"learning_rate": 1.516919486581097e-05,
	"loss": 0.6112,
	"step": 2900
	},
	{
	"epoch": 0.5,
	"grad_norm": 5.171830177307129,
	"learning_rate": 1.5002500416736123e-05,
	"loss": 0.6093,
	"step": 3000
	},
	{
	"epoch": 0.5166666666666667,
	"grad_norm": 4.4593729972839355,
	"learning_rate": 1.4835805967661279e-05,
	"loss": 0.5817,
	"step": 3100
	},
	{
	"epoch": 0.5333333333333333,
	"grad_norm": 1.6261086463928223,
	"learning_rate": 1.4669111518586432e-05,
	"loss": 0.6441,
	"step": 3200
	},
	{
	"epoch": 0.55,
	"grad_norm": 7.9956159591674805,
	"learning_rate": 1.4502417069511588e-05,
	"loss": 0.5863,
	"step": 3300
	},
	{
	"epoch": 0.5666666666666667,
	"grad_norm": 5.350605487823486,
	"learning_rate": 1.433572262043674e-05,
	"loss": 0.5661,
	"step": 3400
	},
	{
	"epoch": 0.5833333333333334,
	"grad_norm": 2.4754750728607178,
	"learning_rate": 1.4169028171361893e-05,
	"loss": 0.5792,
	"step": 3500
	},
	{
	"epoch": 0.6,
	"grad_norm": 6.824994087219238,
	"learning_rate": 1.400233372228705e-05,
	"loss": 0.5871,
	"step": 3600
	},
	{
	"epoch": 0.6166666666666667,
	"grad_norm": 5.394326686859131,
	"learning_rate": 1.3835639273212202e-05,
	"loss": 0.5672,
	"step": 3700
	},
	{
	"epoch": 0.6333333333333333,
	"grad_norm": 3.3266642093658447,
	"learning_rate": 1.3668944824137358e-05,
	"loss": 0.5494,
	"step": 3800
	},
	{
	"epoch": 0.65,
	"grad_norm": 3.2017102241516113,
	"learning_rate": 1.350225037506251e-05,
	"loss": 0.5389,
	"step": 3900
	},
	{
	"epoch": 0.6666666666666666,
	"grad_norm": 1.5428489446640015,
	"learning_rate": 1.3335555925987667e-05,
	"loss": 0.5491,
	"step": 4000
	}
	],
	"logging_steps": 100,
	"max_steps": 12000,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.0297102208608502e+18,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}