{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.8949064016342163, "min": 1.8603436946868896, "max": 3.2957499027252197, "count": 581 }, "SoccerTwos.Policy.Entropy.sum": { "value": 37109.84765625, "min": 21056.34375, "max": 162810.890625, "count": 581 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 52.08695652173913, "min": 39.354330708661415, "max": 999.0, "count": 581 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19168.0, "min": 13408.0, "max": 28940.0, "count": 581 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1535.8170783377445, "min": 1200.801179182846, "max": 1557.0686785893156, "count": 538 }, "SoccerTwos.Self-play.ELO.sum": { "value": 282590.342414145, "min": 2401.602358365692, "max": 383097.3003217104, "count": 538 }, "SoccerTwos.Step.mean": { "value": 5809952.0, "min": 9862.0, "max": 5809952.0, "count": 581 }, "SoccerTwos.Step.sum": { "value": 5809952.0, "min": 9862.0, "max": 5809952.0, "count": 581 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.02647317945957184, "min": -0.08949323743581772, "max": 0.153045192360878, "count": 581 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -4.897538185119629, "min": -19.429080963134766, "max": 23.078475952148438, "count": 581 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.028360364958643913, "min": -0.08809883892536163, "max": 0.15334834158420563, "count": 581 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -5.246667385101318, "min": -19.646041870117188, "max": 24.6627197265625, "count": 581 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 581 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 581 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.091999998608151, "min": -0.5714285714285714, "max": 0.4987941180520198, "count": 581 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 17.019999742507935, "min": -44.81699997186661, "max": 56.24839973449707, "count": 581 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.091999998608151, "min": -0.5714285714285714, "max": 0.4987941180520198, "count": 581 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 17.019999742507935, "min": -44.81699997186661, "max": 56.24839973449707, "count": 581 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 581 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 581 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.016094335338372425, "min": 0.011790066198833908, "max": 0.02423835392886152, "count": 277 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.016094335338372425, "min": 0.011790066198833908, "max": 0.02423835392886152, "count": 277 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10824133232235908, "min": 2.7801446890407534e-06, "max": 0.12347860013445218, "count": 277 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10824133232235908, "min": 2.7801446890407534e-06, "max": 0.12347860013445218, "count": 277 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.11006183400750161, "min": 2.790988332890265e-06, "max": 0.1263651671508948, "count": 277 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.11006183400750161, "min": 2.790988332890265e-06, "max": 0.1263651671508948, "count": 277 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 277 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 277 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.2, "max": 0.20000000000000007, "count": 277 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.2, "max": 0.20000000000000007, "count": 277 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 277 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 277 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1739636222", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\D:\\Programs\\anaconda\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --resume", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.6.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1739653928" }, "total": 17706.134931, "count": 1, "self": 0.9142553999954544, "children": { "run_training.setup": { "total": 0.098279800000455, "count": 1, "self": 0.098279800000455 }, "TrainerController.start_learning": { "total": 17705.122395800005, "count": 1, "self": 11.146700001576392, "children": { "TrainerController._reset_env": { "total": 5.29952559999947, "count": 30, "self": 5.29952559999947 }, "TrainerController.advance": { "total": 17688.50086899843, "count": 396233, "self": 11.0998429002575, "children": { "env_step": { "total": 7473.273780997308, "count": 396233, "self": 5051.324605999995, "children": { "SubprocessEnvManager._take_step": { "total": 2415.3672114005385, "count": 396233, "self": 68.63680060125989, "children": { "TorchPolicy.evaluate": { "total": 2346.7304107992786, "count": 737218, "self": 2346.7304107992786 } } }, "workers": { "total": 6.581963596774585, "count": 396233, "self": 0.0, "children": { "worker_root": { "total": 17663.38646570101, "count": 396233, "is_parallel": true, "self": 13944.472586501179, "children": { "steps_from_proto": { "total": 0.04976100000931183, "count": 60, "is_parallel": true, "self": 0.011394700006349012, "children": { "_process_rank_one_or_two_observation": { "total": 0.038366300002962817, "count": 240, "is_parallel": true, "self": 0.038366300002962817 } } }, "UnityEnvironment.step": { "total": 3718.864118199821, "count": 396233, "is_parallel": true, "self": 189.6772749978336, "children": { "UnityEnvironment._generate_step_input": { "total": 209.4122486008455, "count": 396233, "is_parallel": true, "self": 209.4122486008455 }, "communicator.exchange": { "total": 2666.0434153017413, "count": 396233, "is_parallel": true, "self": 2666.0434153017413 }, "steps_from_proto": { "total": 653.7311792994005, "count": 792466, "is_parallel": true, "self": 151.39783289942352, "children": { "_process_rank_one_or_two_observation": { "total": 502.333346399977, "count": 3169864, "is_parallel": true, "self": 502.333346399977 } } } } } } } } } } }, "trainer_advance": { "total": 10204.127245100866, "count": 396233, "self": 73.84202790062409, "children": { "process_trajectory": { "total": 2159.2702711002676, "count": 396233, "self": 2157.619380600274, "children": { "RLTrainer._checkpoint": { "total": 1.650890499993693, "count": 11, "self": 1.650890499993693 } } }, "_update_policy": { "total": 7971.014946099975, "count": 278, "self": 946.2963003000332, "children": { "TorchPOCAOptimizer.update": { "total": 7024.718645799941, "count": 8338, "self": 7024.718645799941 } } } } } } }, "trainer_threads": { "total": 2.0000006770715117e-06, "count": 1, "self": 2.0000006770715117e-06 }, "TrainerController._save_models": { "total": 0.1752991999965161, "count": 1, "self": 0.015333199997257907, "children": { "RLTrainer._checkpoint": { "total": 0.1599659999992582, "count": 1, "self": 0.1599659999992582 } } } } } } }