{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 3.1873724460601807, "min": 3.1873724460601807, "max": 3.295748710632324, "count": 49 }, "SoccerTwos.Policy.Entropy.sum": { "value": 88124.4765625, "min": 15880.28515625, "max": 142804.84375, "count": 49 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 571.3333333333334, "min": 367.2, "max": 999.0, "count": 49 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 20568.0, "min": 13272.0, "max": 28732.0, "count": 49 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1211.9874366109875, "min": 1195.9164489717098, "max": 1213.9733025908756, "count": 46 }, "SoccerTwos.Self-play.ELO.sum": { "value": 14543.84923933185, "min": 2397.935112475092, "max": 24203.632243773427, "count": 46 }, "SoccerTwos.Step.mean": { "value": 489874.0, "min": 9382.0, "max": 489874.0, "count": 49 }, "SoccerTwos.Step.sum": { "value": 489874.0, "min": 9382.0, "max": 489874.0, "count": 49 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.02012542262673378, "min": -0.10600325465202332, "max": 0.02235868200659752, "count": 49 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 0.36225759983062744, "min": -2.2258334159851074, "max": 0.3848363757133484, "count": 49 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.018841039389371872, "min": -0.10604841262102127, "max": 0.024132657796144485, "count": 49 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 0.3391387164592743, "min": -2.226529359817505, "max": 0.41025519371032715, "count": 49 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 49 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 49 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.35982222027248806, "min": -0.45507692373715913, "max": 0.33965333302815753, "count": 49 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -6.476799964904785, "min": -6.476799964904785, "max": 5.579200029373169, "count": 49 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.35982222027248806, "min": -0.45507692373715913, "max": 0.33965333302815753, "count": 49 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -6.476799964904785, "min": -6.476799964904785, "max": 5.579200029373169, "count": 49 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 49 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 49 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.023655824836653967, "min": 0.011510751590443154, "max": 0.023655824836653967, "count": 22 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.023655824836653967, "min": 0.011510751590443154, "max": 0.023655824836653967, "count": 22 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.0049479416649167735, "min": 0.0007939498983129549, "max": 0.008945920628805955, "count": 22 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.0049479416649167735, "min": 0.0007939498983129549, "max": 0.008945920628805955, "count": 22 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.004987802860947947, "min": 0.0007949432319340606, "max": 0.009049016050994396, "count": 22 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.004987802860947947, "min": 0.0007949432319340606, "max": 0.009049016050994396, "count": 22 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 22 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 22 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 22 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 22 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 22 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 22 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1740138162", "python_version": "3.10.12 (main, Jul 5 2023, 15:02:25) [Clang 14.0.6 ]", "command_line_arguments": "/Users/yusinglam/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.6.0", "numpy_version": "1.23.5", "end_time_seconds": "1740140231" }, "total": 2069.4561217910004, "count": 1, "self": 0.0032099150002977694, "children": { "run_training.setup": { "total": 0.024146292000295944, "count": 1, "self": 0.024146292000295944 }, "TrainerController.start_learning": { "total": 2069.428765584, "count": 1, "self": 0.42225192098067055, "children": { "TrainerController._reset_env": { "total": 5.158866748997752, "count": 3, "self": 5.158866748997752 }, "TrainerController.advance": { "total": 2063.0276540800214, "count": 32485, "self": 0.3791960199132518, "children": { "env_step": { "total": 1642.8784510290634, "count": 32485, "self": 1574.2491970521542, "children": { "SubprocessEnvManager._take_step": { "total": 68.36901075003425, "count": 32485, "self": 2.242853690202537, "children": { "TorchPolicy.evaluate": { "total": 66.12615705983171, "count": 64422, "self": 66.12615705983171 } } }, "workers": { "total": 0.2602432268749908, "count": 32484, "self": 0.0, "children": { "worker_root": { "total": 2063.567589636991, "count": 32484, "is_parallel": true, "self": 541.8382901358555, "children": { "steps_from_proto": { "total": 0.004870458999903349, "count": 6, "is_parallel": true, "self": 0.0006076679965190124, "children": { "_process_rank_one_or_two_observation": { "total": 0.004262791003384336, "count": 24, "is_parallel": true, "self": 0.004262791003384336 } } }, "UnityEnvironment.step": { "total": 1521.7244290421359, "count": 32484, "is_parallel": true, "self": 4.373664715294581, "children": { "UnityEnvironment._generate_step_input": { "total": 26.34676807699907, "count": 32484, "is_parallel": true, "self": 26.34676807699907 }, "communicator.exchange": { "total": 1435.516299409931, "count": 32484, "is_parallel": true, "self": 1435.516299409931 }, "steps_from_proto": { "total": 55.48769683991122, "count": 64968, "is_parallel": true, "self": 6.5330797572778465, "children": { "_process_rank_one_or_two_observation": { "total": 48.954617082633376, "count": 259872, "is_parallel": true, "self": 48.954617082633376 } } } } } } } } } } }, "trainer_advance": { "total": 419.77000703104477, "count": 32484, "self": 3.4646379810483268, "children": { "process_trajectory": { "total": 60.635451508994265, "count": 32484, "self": 60.635451508994265 }, "_update_policy": { "total": 355.6699175410022, "count": 23, "self": 33.79144190801071, "children": { "TorchPOCAOptimizer.update": { "total": 321.87847563299147, "count": 690, "self": 321.87847563299147 } } } } } } }, "trainer_threads": { "total": 1.1669999366858974e-06, "count": 1, "self": 1.1669999366858974e-06 }, "TrainerController._save_models": { "total": 0.8199916670000675, "count": 1, "self": 0.0009697920004327898, "children": { "RLTrainer._checkpoint": { "total": 0.8190218749996347, "count": 1, "self": 0.8190218749996347 } } } } } } }