{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.9366713762283325, "min": 1.921223759651184, "max": 2.7980246543884277, "count": 303 }, "SoccerTwos.Policy.Entropy.sum": { "value": 41150.39453125, "min": 37370.90625, "max": 70047.9765625, "count": 303 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 50.63157894736842, "min": 44.56363636363636, "max": 113.30232558139535, "count": 303 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19240.0, "min": 12676.0, "max": 20980.0, "count": 303 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1539.5732252586963, "min": 1314.574975317422, "max": 1546.7909758691726, "count": 303 }, "SoccerTwos.Self-play.ELO.sum": { "value": 292518.9127991523, "min": 105256.0186730712, "max": 330571.684406115, "count": 303 }, "SoccerTwos.Step.mean": { "value": 4999926.0, "min": 1979962.0, "max": 4999926.0, "count": 303 }, "SoccerTwos.Step.sum": { "value": 4999926.0, "min": 1979962.0, "max": 4999926.0, "count": 303 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.014108068309724331, "min": -0.12224282324314117, "max": 0.1356886327266693, "count": 303 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 2.680532932281494, "min": -22.737165451049805, "max": 22.769405364990234, "count": 303 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.012007688172161579, "min": -0.12296264618635178, "max": 0.15052008628845215, "count": 303 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 2.281460762023926, "min": -22.871051788330078, "max": 23.143291473388672, "count": 303 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 303 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 303 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.02655789287466752, "min": -0.3654246926307678, "max": 0.33317027059761256, "count": 303 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 5.045999646186829, "min": -61.52800005674362, "max": 49.309200048446655, "count": 303 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.02655789287466752, "min": -0.3654246926307678, "max": 0.33317027059761256, "count": 303 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 5.045999646186829, "min": -61.52800005674362, "max": 49.309200048446655, "count": 303 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 303 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 303 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01171204614705251, "min": 0.011686608046875335, "max": 0.02374286432750523, "count": 147 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01171204614705251, "min": 0.011686608046875335, "max": 0.02374286432750523, "count": 147 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10385078514615695, "min": 0.06091271998981635, "max": 0.11298494984706244, "count": 147 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10385078514615695, "min": 0.06091271998981635, "max": 0.11298494984706244, "count": 147 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10561498229702314, "min": 0.06363024786114693, "max": 0.11561396370331446, "count": 147 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.10561498229702314, "min": 0.06363024786114693, "max": 0.11561396370331446, "count": 147 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 147 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 147 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 147 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 147 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 147 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 147 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1739662040", "python_version": "3.10.12 (main, Jul 5 2023, 15:34:07) [Clang 14.0.6 ]", "command_line_arguments": "/Users/asafsmac/miniconda3/envs/rl/bin/mlagents-learn config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --resume", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.2.2", "numpy_version": "1.23.5", "end_time_seconds": "1739708635" }, "total": 46596.602719252, "count": 1, "self": 0.8277513469802216, "children": { "run_training.setup": { "total": 0.0526851590257138, "count": 1, "self": 0.0526851590257138 }, "TrainerController.start_learning": { "total": 46595.72228274599, "count": 1, "self": 12.153964645927772, "children": { "TrainerController._reset_env": { "total": 8.442461882834323, "count": 20, "self": 8.442461882834323 }, "TrainerController.advance": { "total": 46574.71806539234, "count": 209434, "self": 14.309848440927453, "children": { "env_step": { "total": 35972.25376434636, "count": 209434, "self": 34347.69145393954, "children": { "SubprocessEnvManager._take_step": { "total": 1617.3485647756606, "count": 209434, "self": 86.76540581020527, "children": { "TorchPolicy.evaluate": { "total": 1530.5831589654554, "count": 380058, "self": 1530.5831589654554 } } }, "workers": { "total": 7.213745631161146, "count": 209434, "self": 0.0, "children": { "worker_root": { "total": 46568.56154363707, "count": 209434, "is_parallel": true, "self": 13358.061990369693, "children": { "steps_from_proto": { "total": 0.11668114690110087, "count": 40, "is_parallel": true, "self": 0.020781399682164192, "children": { "_process_rank_one_or_two_observation": { "total": 0.09589974721893668, "count": 160, "is_parallel": true, "self": 0.09589974721893668 } } }, "UnityEnvironment.step": { "total": 33210.38287212048, "count": 209434, "is_parallel": true, "self": 155.9417895032093, "children": { "UnityEnvironment._generate_step_input": { "total": 672.769520398113, "count": 209434, "is_parallel": true, "self": 672.769520398113 }, "communicator.exchange": { "total": 31152.906478734687, "count": 209434, "is_parallel": true, "self": 31152.906478734687 }, "steps_from_proto": { "total": 1228.765083484468, "count": 418868, "is_parallel": true, "self": 219.4591452423483, "children": { "_process_rank_one_or_two_observation": { "total": 1009.3059382421197, "count": 1675472, "is_parallel": true, "self": 1009.3059382421197 } } } } } } } } } } }, "trainer_advance": { "total": 10588.154452605057, "count": 209434, "self": 78.7347104542423, "children": { "process_trajectory": { "total": 1864.6498735909117, "count": 209434, "self": 1861.6086807028623, "children": { "RLTrainer._checkpoint": { "total": 3.041192888049409, "count": 7, "self": 3.041192888049409 } } }, "_update_policy": { "total": 8644.769868559903, "count": 147, "self": 719.2591106550535, "children": { "TorchPOCAOptimizer.update": { "total": 7925.510757904849, "count": 4410, "self": 7925.510757904849 } } } } } } }, "trainer_threads": { "total": 2.058921381831169e-06, "count": 1, "self": 2.058921381831169e-06 }, "TrainerController._save_models": { "total": 0.4077887659659609, "count": 1, "self": 0.0035792149137705564, "children": { "RLTrainer._checkpoint": { "total": 0.40420955105219036, "count": 1, "self": 0.40420955105219036 } } } } } } }