poca-SoccerTwos-50M / run_logs /timers.json

50M steps

337c50c about 2 years ago

15.7 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.3372392654418945,
	"min": 1.2911572456359863,
	"max": 1.490708351135254,
	"count": 1000
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 27686.201171875,
	"min": 23950.677734375,
	"max": 33094.0390625,
	"count": 1000
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 51.322916666666664,
	"min": 45.271028037383175,
	"max": 91.20370370370371,
	"count": 1000
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19708.0,
	"min": 17624.0,
	"max": 21940.0,
	"count": 1000
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1886.4125007328173,
	"min": 1845.5113490249428,
	"max": 1920.2038887511121,
	"count": 1000
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 362191.2001407009,
	"min": 201643.19282499969,
	"max": 407254.03114884254,
	"count": 1000
	},
	"SoccerTwos.Step.mean": {
	"value": 49999928.0,
	"min": 40009986.0,
	"max": 49999928.0,
	"count": 1000
	},
	"SoccerTwos.Step.sum": {
	"value": 49999928.0,
	"min": 40009986.0,
	"max": 49999928.0,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.032523591071367264,
	"min": -0.12096764147281647,
	"max": 0.0896327942609787,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -6.244529724121094,
	"min": -20.443531036376953,
	"max": 12.646177291870117,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.034205373376607895,
	"min": -0.11887526512145996,
	"max": 0.08995815366506577,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -6.567431449890137,
	"min": -20.089920043945312,
	"max": 12.70363712310791,
	"count": 1000
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1000
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.22742916705707708,
	"min": -0.44551594015480817,
	"max": 0.30332198506551433,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -43.6664000749588,
	"min": -61.481199741363525,
	"max": 49.555999517440796,
	"count": 1000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.22742916705707708,
	"min": -0.44551594015480817,
	"max": 0.30332198506551433,
	"count": 1000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -43.6664000749588,
	"min": -61.481199741363525,
	"max": 49.555999517440796,
	"count": 1000
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1000
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1000
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.0131971450978502,
	"min": 0.01090679973300818,
	"max": 0.025141271490914125,
	"count": 485
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.0131971450978502,
	"min": 0.01090679973300818,
	"max": 0.025141271490914125,
	"count": 485
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.10739270200332006,
	"min": 0.08449221576253572,
	"max": 0.12066337143381436,
	"count": 485
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.10739270200332006,
	"min": 0.08449221576253572,
	"max": 0.12066337143381436,
	"count": 485
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.10865302781263987,
	"min": 0.08513711268703143,
	"max": 0.12150260110696157,
	"count": 485
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.10865302781263987,
	"min": 0.08513711268703143,
	"max": 0.12150260110696157,
	"count": 485
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 485
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 485
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 485
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 485
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 485
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 485
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1676059757",
	"python_version": "3.9.16 (main, Jan 11 2023, 10:02:19) \n[Clang 14.0.6 ]",
	"command_line_arguments": "/Users/sasha/opt/anaconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --resume",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1676112002"
	},
	"total": 52246.098630630004,
	"count": 1,
	"self": 1.349196472998301,
	"children": {
	"run_training.setup": {
	"total": 0.025977141999999898,
	"count": 1,
	"self": 0.025977141999999898
	},
	"TrainerController.start_learning": {
	"total": 52244.723457015,
	"count": 1,
	"self": 16.77068882175081,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.9831025650062233,
	"count": 41,
	"self": 2.9831025650062233
	},
	"TrainerController.advance": {
	"total": 52224.76532062625,
	"count": 689167,
	"self": 15.940755929579609,
	"children": {
	"env_step": {
	"total": 10612.097910706343,
	"count": 689167,
	"self": 8575.578037359955,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 2026.4311550891694,
	"count": 689167,
	"self": 78.55218210937346,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 1947.878972979796,
	"count": 1253986,
	"self": 1947.878972979796
	}
	}
	},
	"workers": {
	"total": 10.088718257218032,
	"count": 689167,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 52211.624944290714,
	"count": 689167,
	"is_parallel": true,
	"self": 45291.95068353964,
	"children": {
	"steps_from_proto": {
	"total": 0.0714736650060166,
	"count": 82,
	"is_parallel": true,
	"self": 0.015245358004622123,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.05622830700139447,
	"count": 328,
	"is_parallel": true,
	"self": 0.05622830700139447
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 6919.602787086071,
	"count": 689167,
	"is_parallel": true,
	"self": 426.8104536686669,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 281.81996137985857,
	"count": 689167,
	"is_parallel": true,
	"self": 281.81996137985857
	},
	"communicator.exchange": {
	"total": 5070.850199593674,
	"count": 689167,
	"is_parallel": true,
	"self": 5070.850199593674
	},
	"steps_from_proto": {
	"total": 1140.122172443871,
	"count": 1378334,
	"is_parallel": true,
	"self": 249.89970291236955,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 890.2224695315015,
	"count": 5513336,
	"is_parallel": true,
	"self": 890.2224695315015
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 41596.72665399033,
	"count": 689167,
	"self": 127.09956205873459,
	"children": {
	"process_trajectory": {
	"total": 3453.842992483529,
	"count": 689167,
	"self": 3449.810125398529,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 4.032867084999907,
	"count": 20,
	"self": 4.032867084999907
	}
	}
	},
	"_update_policy": {
	"total": 38015.784099448065,
	"count": 485,
	"self": 1551.5827546807632,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 36464.2013447673,
	"count": 14550,
	"self": 36464.2013447673
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 8.319984772242606e-07,
	"count": 1,
	"self": 8.319984772242606e-07
	},
	"TrainerController._save_models": {
	"total": 0.20434417000069516,
	"count": 1,
	"self": 0.0017525270013720728,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.20259164299932308,
	"count": 1,
	"self": 0.20259164299932308
	}
	}
	}
	}
	}
	}
	}