atorre's picture
50M steps
337c50c
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 1.3372392654418945,
"min": 1.2911572456359863,
"max": 1.490708351135254,
"count": 1000
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 27686.201171875,
"min": 23950.677734375,
"max": 33094.0390625,
"count": 1000
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 51.322916666666664,
"min": 45.271028037383175,
"max": 91.20370370370371,
"count": 1000
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19708.0,
"min": 17624.0,
"max": 21940.0,
"count": 1000
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1886.4125007328173,
"min": 1845.5113490249428,
"max": 1920.2038887511121,
"count": 1000
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 362191.2001407009,
"min": 201643.19282499969,
"max": 407254.03114884254,
"count": 1000
},
"SoccerTwos.Step.mean": {
"value": 49999928.0,
"min": 40009986.0,
"max": 49999928.0,
"count": 1000
},
"SoccerTwos.Step.sum": {
"value": 49999928.0,
"min": 40009986.0,
"max": 49999928.0,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.032523591071367264,
"min": -0.12096764147281647,
"max": 0.0896327942609787,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -6.244529724121094,
"min": -20.443531036376953,
"max": 12.646177291870117,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.034205373376607895,
"min": -0.11887526512145996,
"max": 0.08995815366506577,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -6.567431449890137,
"min": -20.089920043945312,
"max": 12.70363712310791,
"count": 1000
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 1000
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.22742916705707708,
"min": -0.44551594015480817,
"max": 0.30332198506551433,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -43.6664000749588,
"min": -61.481199741363525,
"max": 49.555999517440796,
"count": 1000
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.22742916705707708,
"min": -0.44551594015480817,
"max": 0.30332198506551433,
"count": 1000
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -43.6664000749588,
"min": -61.481199741363525,
"max": 49.555999517440796,
"count": 1000
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1000
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1000
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.0131971450978502,
"min": 0.01090679973300818,
"max": 0.025141271490914125,
"count": 485
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.0131971450978502,
"min": 0.01090679973300818,
"max": 0.025141271490914125,
"count": 485
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.10739270200332006,
"min": 0.08449221576253572,
"max": 0.12066337143381436,
"count": 485
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.10739270200332006,
"min": 0.08449221576253572,
"max": 0.12066337143381436,
"count": 485
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.10865302781263987,
"min": 0.08513711268703143,
"max": 0.12150260110696157,
"count": 485
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.10865302781263987,
"min": 0.08513711268703143,
"max": 0.12150260110696157,
"count": 485
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 485
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 485
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 485
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 485
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 485
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 485
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1676059757",
"python_version": "3.9.16 (main, Jan 11 2023, 10:02:19) \n[Clang 14.0.6 ]",
"command_line_arguments": "/Users/sasha/opt/anaconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --resume",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0",
"numpy_version": "1.21.2",
"end_time_seconds": "1676112002"
},
"total": 52246.098630630004,
"count": 1,
"self": 1.349196472998301,
"children": {
"run_training.setup": {
"total": 0.025977141999999898,
"count": 1,
"self": 0.025977141999999898
},
"TrainerController.start_learning": {
"total": 52244.723457015,
"count": 1,
"self": 16.77068882175081,
"children": {
"TrainerController._reset_env": {
"total": 2.9831025650062233,
"count": 41,
"self": 2.9831025650062233
},
"TrainerController.advance": {
"total": 52224.76532062625,
"count": 689167,
"self": 15.940755929579609,
"children": {
"env_step": {
"total": 10612.097910706343,
"count": 689167,
"self": 8575.578037359955,
"children": {
"SubprocessEnvManager._take_step": {
"total": 2026.4311550891694,
"count": 689167,
"self": 78.55218210937346,
"children": {
"TorchPolicy.evaluate": {
"total": 1947.878972979796,
"count": 1253986,
"self": 1947.878972979796
}
}
},
"workers": {
"total": 10.088718257218032,
"count": 689167,
"self": 0.0,
"children": {
"worker_root": {
"total": 52211.624944290714,
"count": 689167,
"is_parallel": true,
"self": 45291.95068353964,
"children": {
"steps_from_proto": {
"total": 0.0714736650060166,
"count": 82,
"is_parallel": true,
"self": 0.015245358004622123,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.05622830700139447,
"count": 328,
"is_parallel": true,
"self": 0.05622830700139447
}
}
},
"UnityEnvironment.step": {
"total": 6919.602787086071,
"count": 689167,
"is_parallel": true,
"self": 426.8104536686669,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 281.81996137985857,
"count": 689167,
"is_parallel": true,
"self": 281.81996137985857
},
"communicator.exchange": {
"total": 5070.850199593674,
"count": 689167,
"is_parallel": true,
"self": 5070.850199593674
},
"steps_from_proto": {
"total": 1140.122172443871,
"count": 1378334,
"is_parallel": true,
"self": 249.89970291236955,
"children": {
"_process_rank_one_or_two_observation": {
"total": 890.2224695315015,
"count": 5513336,
"is_parallel": true,
"self": 890.2224695315015
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 41596.72665399033,
"count": 689167,
"self": 127.09956205873459,
"children": {
"process_trajectory": {
"total": 3453.842992483529,
"count": 689167,
"self": 3449.810125398529,
"children": {
"RLTrainer._checkpoint": {
"total": 4.032867084999907,
"count": 20,
"self": 4.032867084999907
}
}
},
"_update_policy": {
"total": 38015.784099448065,
"count": 485,
"self": 1551.5827546807632,
"children": {
"TorchPOCAOptimizer.update": {
"total": 36464.2013447673,
"count": 14550,
"self": 36464.2013447673
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.319984772242606e-07,
"count": 1,
"self": 8.319984772242606e-07
},
"TrainerController._save_models": {
"total": 0.20434417000069516,
"count": 1,
"self": 0.0017525270013720728,
"children": {
"RLTrainer._checkpoint": {
"total": 0.20259164299932308,
"count": 1,
"self": 0.20259164299932308
}
}
}
}
}
}
}