ZZanty's picture
Agent training resumed from the checkpoint.
0391038 verified
raw
history blame
20 kB
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 2.728860855102539,
"min": 2.607203960418701,
"max": 3.186396837234497,
"count": 237
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 37549.125,
"min": 3432.833984375,
"max": 114215.890625,
"count": 237
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 999.0,
"min": 563.875,
"max": 999.0,
"count": 237
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19980.0,
"min": 7992.0,
"max": 31968.0,
"count": 237
},
"SoccerTwos.Step.mean": {
"value": 4999572.0,
"min": 2619779.0,
"max": 4999572.0,
"count": 239
},
"SoccerTwos.Step.sum": {
"value": 4999572.0,
"min": 2619779.0,
"max": 4999572.0,
"count": 239
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.010554909706115723,
"min": -0.024652715772390366,
"max": 0.009026331827044487,
"count": 239
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -0.10554909706115723,
"min": -0.24652716517448425,
"max": 0.09026331454515457,
"count": 239
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.010584806092083454,
"min": -0.024528544396162033,
"max": 0.005459004081785679,
"count": 239
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -0.10584805905818939,
"min": -0.24528545141220093,
"max": 0.05459003895521164,
"count": 239
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 239
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 239
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.0,
"min": -0.3076923076923077,
"max": 0.1315636309710416,
"count": 239
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 0.0,
"min": -4.0,
"max": 1.4471999406814575,
"count": 239
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.0,
"min": -0.3076923076923077,
"max": 0.1315636309710416,
"count": 239
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 0.0,
"min": -4.0,
"max": 1.4471999406814575,
"count": 239
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 239
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 239
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1188.63912490485,
"min": 1188.296183058372,
"max": 1193.4099719653157,
"count": 33
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 2377.2782498097,
"min": 2376.592366116744,
"max": 7156.471530987656,
"count": 33
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.017341362327958146,
"min": 0.010875945306179347,
"max": 0.02294719993757705,
"count": 108
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.017341362327958146,
"min": 0.010875945306179347,
"max": 0.02294719993757705,
"count": 108
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 6.84104789350215e-07,
"min": 1.5518375806600205e-09,
"max": 0.002259943775910263,
"count": 108
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 6.84104789350215e-07,
"min": 1.5518375806600205e-09,
"max": 0.002259943775910263,
"count": 108
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 6.881377158170684e-07,
"min": 1.9861120451973117e-09,
"max": 0.002269505410610388,
"count": 108
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 6.881377158170684e-07,
"min": 1.9861120451973117e-09,
"max": 0.002269505410610388,
"count": 108
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 108
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 108
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.19999999999999993,
"max": 0.20000000000000007,
"count": 108
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.19999999999999993,
"max": 0.20000000000000007,
"count": 108
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005,
"max": 0.005000000000000001,
"count": 108
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005,
"max": 0.005000000000000001,
"count": 108
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1734604203",
"python_version": "3.10.12 (main, Nov 6 2024, 20:22:13) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./results/SoccerTwosNew/configuration.yaml --env=train-soccer/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwosNew --no-graphics --resume",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.5.1+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1734618449"
},
"total": 14245.571375721998,
"count": 1,
"self": 0.7815168619963515,
"children": {
"run_training.setup": {
"total": 0.1262984900004085,
"count": 1,
"self": 0.1262984900004085
},
"TrainerController.start_learning": {
"total": 14244.663560370002,
"count": 1,
"self": 6.910008685455978,
"children": {
"TrainerController._reset_env": {
"total": 6.514746067001397,
"count": 13,
"self": 6.514746067001397
},
"TrainerController.advance": {
"total": 14230.92983096054,
"count": 155217,
"self": 6.774979415218695,
"children": {
"env_step": {
"total": 5727.471202000965,
"count": 155217,
"self": 4592.1828661981,
"children": {
"SubprocessEnvManager._take_step": {
"total": 1130.9616338698124,
"count": 155217,
"self": 41.20201545738928,
"children": {
"TorchPolicy.evaluate": {
"total": 1089.7596184124232,
"count": 308522,
"self": 1089.7596184124232
}
}
},
"workers": {
"total": 4.326701933052391,
"count": 155217,
"self": 0.0,
"children": {
"worker_root": {
"total": 14214.414362956855,
"count": 155217,
"is_parallel": true,
"self": 10462.310586675896,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0053524930008279625,
"count": 2,
"is_parallel": true,
"self": 0.0012595870011864463,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.004092905999641516,
"count": 8,
"is_parallel": true,
"self": 0.004092905999641516
}
}
},
"UnityEnvironment.step": {
"total": 0.048933748999843374,
"count": 1,
"is_parallel": true,
"self": 0.0017748409991327208,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.000988138999673538,
"count": 1,
"is_parallel": true,
"self": 0.000988138999673538
},
"communicator.exchange": {
"total": 0.04035962499983725,
"count": 1,
"is_parallel": true,
"self": 0.04035962499983725
},
"steps_from_proto": {
"total": 0.005811144001199864,
"count": 2,
"is_parallel": true,
"self": 0.0012555690027511446,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.004555574998448719,
"count": 8,
"is_parallel": true,
"self": 0.004555574998448719
}
}
}
}
}
}
},
"steps_from_proto": {
"total": 0.03152863800096384,
"count": 24,
"is_parallel": true,
"self": 0.0066778510190488305,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.024850786981915007,
"count": 96,
"is_parallel": true,
"self": 0.024850786981915007
}
}
},
"UnityEnvironment.step": {
"total": 3752.0722476429582,
"count": 155216,
"is_parallel": true,
"self": 226.350097584198,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 150.0987853468814,
"count": 155216,
"is_parallel": true,
"self": 150.0987853468814
},
"communicator.exchange": {
"total": 2687.3918722135077,
"count": 155216,
"is_parallel": true,
"self": 2687.3918722135077
},
"steps_from_proto": {
"total": 688.2314924983712,
"count": 310432,
"is_parallel": true,
"self": 119.08135045259951,
"children": {
"_process_rank_one_or_two_observation": {
"total": 569.1501420457716,
"count": 1241728,
"is_parallel": true,
"self": 569.1501420457716
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 8496.683649544357,
"count": 155217,
"self": 64.8241411122799,
"children": {
"process_trajectory": {
"total": 1050.027008031091,
"count": 155217,
"self": 1048.5338505580949,
"children": {
"RLTrainer._checkpoint": {
"total": 1.493157472996245,
"count": 5,
"self": 1.493157472996245
}
}
},
"_update_policy": {
"total": 7381.832500400986,
"count": 108,
"self": 521.9311950030387,
"children": {
"TorchPOCAOptimizer.update": {
"total": 6859.901305397947,
"count": 3273,
"self": 6859.901305397947
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.2490017979871482e-06,
"count": 1,
"self": 1.2490017979871482e-06
},
"TrainerController._save_models": {
"total": 0.30897340800220263,
"count": 1,
"self": 0.006360611001582583,
"children": {
"RLTrainer._checkpoint": {
"total": 0.30261279700062005,
"count": 1,
"self": 0.30261279700062005
}
}
}
}
}
}
}