Upload folder using huggingface_hub
Browse files
.summary/0/events.out.tfevents.1754371299.dcb799bd6d0b
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89d51c6f1d23cfbee553b3b6f8a4c8737a35cdbe81b2f43932ab1206a5882ef3
|
3 |
+
size 807655
|
README.md
CHANGED
@@ -15,7 +15,7 @@ model-index:
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
-
value:
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
+
value: 11.25 +/- 5.75
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
checkpoint_p0/best_000000874_3579904_reward_22.461.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:219ba6dd52aca8244382fedbce2711152a0ae198e0cc524d6e6469588c82fb6c
|
3 |
+
size 34929708
|
checkpoint_p0/checkpoint_000000932_3817472.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5beda1e6fc163a6b107710ea7ed5e3bbe99842482e70329515825bb3d71709b8
|
3 |
+
size 34930146
|
checkpoint_p0/checkpoint_000000978_4005888.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:311c4f5a8d51ad18dcacee5311a9f7c57d1c89b1187c07d1c5581e9ffd94cbbb
|
3 |
+
size 34930146
|
config.json
CHANGED
@@ -65,7 +65,7 @@
|
|
65 |
"summaries_use_frameskip": true,
|
66 |
"heartbeat_interval": 20,
|
67 |
"heartbeat_reporting_interval": 600,
|
68 |
-
"train_for_env_steps":
|
69 |
"train_for_seconds": 10000000000,
|
70 |
"save_every_sec": 120,
|
71 |
"keep_checkpoints": 2,
|
|
|
65 |
"summaries_use_frameskip": true,
|
66 |
"heartbeat_interval": 20,
|
67 |
"heartbeat_reporting_interval": 600,
|
68 |
+
"train_for_env_steps": 4000000,
|
69 |
"train_for_seconds": 10000000000,
|
70 |
"save_every_sec": 120,
|
71 |
"keep_checkpoints": 2,
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16d109520545ade388c8f4f983329a21a1aa6874008836706d3d698a1f07634b
|
3 |
+
size 21688757
|
sf_log.txt
CHANGED
@@ -3931,3 +3931,942 @@ main_loop: 40.1994
|
|
3931 |
[2025-08-05 05:20:14,348][34008] Avg episode rewards: #0: 4.432, true rewards: #0: 4.032
|
3932 |
[2025-08-05 05:20:14,349][34008] Avg episode reward: 4.432, avg true_objective: 4.032
|
3933 |
[2025-08-05 05:20:34,959][34008] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3931 |
[2025-08-05 05:20:14,348][34008] Avg episode rewards: #0: 4.432, true rewards: #0: 4.032
|
3932 |
[2025-08-05 05:20:14,349][34008] Avg episode reward: 4.432, avg true_objective: 4.032
|
3933 |
[2025-08-05 05:20:34,959][34008] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
3934 |
+
[2025-08-05 05:20:40,363][34008] The model has been pushed to https://huggingface.co/naveen1divakar/rl_course_vizdoom_health_gathering_supreme
|
3935 |
+
[2025-08-05 05:21:43,601][34856] Saving configuration to /content/train_dir/default_experiment/config.json...
|
3936 |
+
[2025-08-05 05:21:43,602][34856] Rollout worker 0 uses device cpu
|
3937 |
+
[2025-08-05 05:21:43,603][34856] Rollout worker 1 uses device cpu
|
3938 |
+
[2025-08-05 05:21:43,603][34856] Rollout worker 2 uses device cpu
|
3939 |
+
[2025-08-05 05:21:43,603][34856] Rollout worker 3 uses device cpu
|
3940 |
+
[2025-08-05 05:21:43,603][34856] Rollout worker 4 uses device cpu
|
3941 |
+
[2025-08-05 05:21:43,603][34856] Rollout worker 5 uses device cpu
|
3942 |
+
[2025-08-05 05:21:43,603][34856] Rollout worker 6 uses device cpu
|
3943 |
+
[2025-08-05 05:21:43,604][34856] Rollout worker 7 uses device cpu
|
3944 |
+
[2025-08-05 05:21:43,694][34856] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3945 |
+
[2025-08-05 05:21:43,694][34856] InferenceWorker_p0-w0: min num requests: 2
|
3946 |
+
[2025-08-05 05:21:43,723][34856] Starting all processes...
|
3947 |
+
[2025-08-05 05:21:43,723][34856] Starting process learner_proc0
|
3948 |
+
[2025-08-05 05:21:45,733][34856] Starting all processes...
|
3949 |
+
[2025-08-05 05:21:45,739][34856] Starting process inference_proc0-0
|
3950 |
+
[2025-08-05 05:21:45,741][34895] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3951 |
+
[2025-08-05 05:21:45,741][34895] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
3952 |
+
[2025-08-05 05:21:45,740][34856] Starting process rollout_proc0
|
3953 |
+
[2025-08-05 05:21:45,740][34856] Starting process rollout_proc1
|
3954 |
+
[2025-08-05 05:21:45,740][34856] Starting process rollout_proc2
|
3955 |
+
[2025-08-05 05:21:45,740][34856] Starting process rollout_proc3
|
3956 |
+
[2025-08-05 05:21:45,740][34856] Starting process rollout_proc4
|
3957 |
+
[2025-08-05 05:21:45,761][34895] Num visible devices: 1
|
3958 |
+
[2025-08-05 05:21:45,763][34895] Starting seed is not provided
|
3959 |
+
[2025-08-05 05:21:45,764][34895] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3960 |
+
[2025-08-05 05:21:45,764][34895] Initializing actor-critic model on device cuda:0
|
3961 |
+
[2025-08-05 05:21:45,765][34895] RunningMeanStd input shape: (3, 72, 128)
|
3962 |
+
[2025-08-05 05:21:45,766][34895] RunningMeanStd input shape: (1,)
|
3963 |
+
[2025-08-05 05:21:45,740][34856] Starting process rollout_proc5
|
3964 |
+
[2025-08-05 05:21:45,740][34856] Starting process rollout_proc6
|
3965 |
+
[2025-08-05 05:21:45,740][34856] Starting process rollout_proc7
|
3966 |
+
[2025-08-05 05:21:45,882][34895] ConvEncoder: input_channels=3
|
3967 |
+
[2025-08-05 05:21:47,538][34895] Conv encoder output size: 512
|
3968 |
+
[2025-08-05 05:21:47,551][34895] Policy head output size: 512
|
3969 |
+
[2025-08-05 05:21:47,789][34895] Created Actor Critic model with architecture:
|
3970 |
+
[2025-08-05 05:21:47,797][34895] ActorCriticSharedWeights(
|
3971 |
+
(obs_normalizer): ObservationNormalizer(
|
3972 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
3973 |
+
(running_mean_std): ModuleDict(
|
3974 |
+
(obs): RunningMeanStdInPlace()
|
3975 |
+
)
|
3976 |
+
)
|
3977 |
+
)
|
3978 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
3979 |
+
(encoder): VizdoomEncoder(
|
3980 |
+
(basic_encoder): ConvEncoder(
|
3981 |
+
(enc): RecursiveScriptModule(
|
3982 |
+
original_name=ConvEncoderImpl
|
3983 |
+
(conv_head): RecursiveScriptModule(
|
3984 |
+
original_name=Sequential
|
3985 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
3986 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
3987 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
3988 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
3989 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
3990 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
3991 |
+
)
|
3992 |
+
(mlp_layers): RecursiveScriptModule(
|
3993 |
+
original_name=Sequential
|
3994 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
3995 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
3996 |
+
)
|
3997 |
+
)
|
3998 |
+
)
|
3999 |
+
)
|
4000 |
+
(core): ModelCoreRNN(
|
4001 |
+
(core): GRU(512, 512)
|
4002 |
+
)
|
4003 |
+
(decoder): MlpDecoder(
|
4004 |
+
(mlp): Identity()
|
4005 |
+
)
|
4006 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
4007 |
+
(action_parameterization): ActionParameterizationDefault(
|
4008 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
4009 |
+
)
|
4010 |
+
)
|
4011 |
+
[2025-08-05 05:21:48,928][34895] Using optimizer <class 'torch.optim.adam.Adam'>
|
4012 |
+
[2025-08-05 05:22:01,716][34895] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000101_413696.pth...
|
4013 |
+
[2025-08-05 05:22:02,249][34895] Loading model from checkpoint
|
4014 |
+
[2025-08-05 05:22:02,269][34895] Loaded experiment state at self.train_step=101, self.env_steps=413696
|
4015 |
+
[2025-08-05 05:22:02,280][34895] Initialized policy 0 weights for model version 101
|
4016 |
+
[2025-08-05 05:22:02,282][34895] LearnerWorker_p0 finished initialization!
|
4017 |
+
[2025-08-05 05:22:02,283][34895] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
4018 |
+
[2025-08-05 05:22:04,931][34919] Worker 4 uses CPU cores [0]
|
4019 |
+
[2025-08-05 05:22:04,938][34920] Worker 0 uses CPU cores [0]
|
4020 |
+
[2025-08-05 05:22:04,942][34917] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
4021 |
+
[2025-08-05 05:22:04,955][34917] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
4022 |
+
[2025-08-05 05:22:05,015][34924] Worker 5 uses CPU cores [1]
|
4023 |
+
[2025-08-05 05:22:05,033][34917] Num visible devices: 1
|
4024 |
+
[2025-08-05 05:22:05,131][34922] Worker 3 uses CPU cores [1]
|
4025 |
+
[2025-08-05 05:22:05,152][34926] Worker 6 uses CPU cores [0]
|
4026 |
+
[2025-08-05 05:22:05,349][34921] Worker 2 uses CPU cores [0]
|
4027 |
+
[2025-08-05 05:22:05,409][34925] Worker 7 uses CPU cores [1]
|
4028 |
+
[2025-08-05 05:22:05,425][34917] RunningMeanStd input shape: (3, 72, 128)
|
4029 |
+
[2025-08-05 05:22:05,427][34917] RunningMeanStd input shape: (1,)
|
4030 |
+
[2025-08-05 05:22:05,453][34917] ConvEncoder: input_channels=3
|
4031 |
+
[2025-08-05 05:22:05,506][34856] Heartbeat connected on Batcher_0
|
4032 |
+
[2025-08-05 05:22:05,507][34856] Heartbeat connected on LearnerWorker_p0
|
4033 |
+
[2025-08-05 05:22:05,507][34856] Heartbeat connected on RolloutWorker_w4
|
4034 |
+
[2025-08-05 05:22:05,508][34856] Heartbeat connected on RolloutWorker_w0
|
4035 |
+
[2025-08-05 05:22:05,508][34856] Heartbeat connected on RolloutWorker_w5
|
4036 |
+
[2025-08-05 05:22:05,508][34856] Heartbeat connected on RolloutWorker_w3
|
4037 |
+
[2025-08-05 05:22:05,508][34856] Heartbeat connected on RolloutWorker_w6
|
4038 |
+
[2025-08-05 05:22:05,509][34856] Heartbeat connected on RolloutWorker_w2
|
4039 |
+
[2025-08-05 05:22:05,509][34856] Heartbeat connected on RolloutWorker_w7
|
4040 |
+
[2025-08-05 05:22:05,509][34856] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 413696. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
4041 |
+
[2025-08-05 05:22:05,514][34856] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 413696. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
4042 |
+
[2025-08-05 05:22:05,516][34856] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 413696. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
4043 |
+
[2025-08-05 05:22:05,519][34856] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 413696. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
4044 |
+
[2025-08-05 05:22:05,530][34923] Worker 1 uses CPU cores [1]
|
4045 |
+
[2025-08-05 05:22:05,536][34856] Heartbeat connected on RolloutWorker_w1
|
4046 |
+
[2025-08-05 05:22:05,615][34917] Conv encoder output size: 512
|
4047 |
+
[2025-08-05 05:22:05,615][34917] Policy head output size: 512
|
4048 |
+
[2025-08-05 05:22:05,656][34856] Inference worker 0-0 is ready!
|
4049 |
+
[2025-08-05 05:22:05,656][34856] All inference workers are ready! Signal rollout workers to start!
|
4050 |
+
[2025-08-05 05:22:05,657][34856] Heartbeat connected on InferenceWorker_p0-w0
|
4051 |
+
[2025-08-05 05:22:05,850][34920] Doom resolution: 160x120, resize resolution: (128, 72)
|
4052 |
+
[2025-08-05 05:22:05,849][34922] Doom resolution: 160x120, resize resolution: (128, 72)
|
4053 |
+
[2025-08-05 05:22:05,851][34919] Doom resolution: 160x120, resize resolution: (128, 72)
|
4054 |
+
[2025-08-05 05:22:05,851][34923] Doom resolution: 160x120, resize resolution: (128, 72)
|
4055 |
+
[2025-08-05 05:22:05,853][34925] Doom resolution: 160x120, resize resolution: (128, 72)
|
4056 |
+
[2025-08-05 05:22:05,854][34921] Doom resolution: 160x120, resize resolution: (128, 72)
|
4057 |
+
[2025-08-05 05:22:05,853][34926] Doom resolution: 160x120, resize resolution: (128, 72)
|
4058 |
+
[2025-08-05 05:22:05,855][34924] Doom resolution: 160x120, resize resolution: (128, 72)
|
4059 |
+
[2025-08-05 05:22:07,740][34926] Decorrelating experience for 0 frames...
|
4060 |
+
[2025-08-05 05:22:07,745][34919] Decorrelating experience for 0 frames...
|
4061 |
+
[2025-08-05 05:22:07,751][34921] Decorrelating experience for 0 frames...
|
4062 |
+
[2025-08-05 05:22:07,989][34922] Decorrelating experience for 0 frames...
|
4063 |
+
[2025-08-05 05:22:08,001][34923] Decorrelating experience for 0 frames...
|
4064 |
+
[2025-08-05 05:22:08,004][34924] Decorrelating experience for 0 frames...
|
4065 |
+
[2025-08-05 05:22:08,008][34925] Decorrelating experience for 0 frames...
|
4066 |
+
[2025-08-05 05:22:08,730][34919] Decorrelating experience for 32 frames...
|
4067 |
+
[2025-08-05 05:22:08,739][34921] Decorrelating experience for 32 frames...
|
4068 |
+
[2025-08-05 05:22:09,060][34856] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 413696. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
4069 |
+
[2025-08-05 05:22:09,143][34924] Decorrelating experience for 32 frames...
|
4070 |
+
[2025-08-05 05:22:09,146][34922] Decorrelating experience for 32 frames...
|
4071 |
+
[2025-08-05 05:22:09,148][34925] Decorrelating experience for 32 frames...
|
4072 |
+
[2025-08-05 05:22:09,211][34920] Decorrelating experience for 0 frames...
|
4073 |
+
[2025-08-05 05:22:10,226][34921] Decorrelating experience for 64 frames...
|
4074 |
+
[2025-08-05 05:22:10,237][34919] Decorrelating experience for 64 frames...
|
4075 |
+
[2025-08-05 05:22:10,420][34920] Decorrelating experience for 32 frames...
|
4076 |
+
[2025-08-05 05:22:10,712][34923] Decorrelating experience for 32 frames...
|
4077 |
+
[2025-08-05 05:22:11,090][34924] Decorrelating experience for 64 frames...
|
4078 |
+
[2025-08-05 05:22:11,091][34922] Decorrelating experience for 64 frames...
|
4079 |
+
[2025-08-05 05:22:11,096][34925] Decorrelating experience for 64 frames...
|
4080 |
+
[2025-08-05 05:22:11,404][34921] Decorrelating experience for 96 frames...
|
4081 |
+
[2025-08-05 05:22:12,020][34920] Decorrelating experience for 64 frames...
|
4082 |
+
[2025-08-05 05:22:12,151][34926] Decorrelating experience for 32 frames...
|
4083 |
+
[2025-08-05 05:22:13,214][34923] Decorrelating experience for 64 frames...
|
4084 |
+
[2025-08-05 05:22:13,354][34924] Decorrelating experience for 96 frames...
|
4085 |
+
[2025-08-05 05:22:13,359][34925] Decorrelating experience for 96 frames...
|
4086 |
+
[2025-08-05 05:22:13,695][34919] Decorrelating experience for 96 frames...
|
4087 |
+
[2025-08-05 05:22:14,060][34856] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 413696. Throughput: 0: 1.2. Samples: 10. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
4088 |
+
[2025-08-05 05:22:14,063][34856] Avg episode reward: [(0, '1.280')]
|
4089 |
+
[2025-08-05 05:22:15,272][34922] Decorrelating experience for 96 frames...
|
4090 |
+
[2025-08-05 05:22:15,302][34920] Decorrelating experience for 96 frames...
|
4091 |
+
[2025-08-05 05:22:16,739][34926] Decorrelating experience for 64 frames...
|
4092 |
+
[2025-08-05 05:22:17,696][34923] Decorrelating experience for 96 frames...
|
4093 |
+
[2025-08-05 05:22:19,060][34856] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 413696. Throughput: 0: 132.4. Samples: 1794. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
4094 |
+
[2025-08-05 05:22:19,061][34856] Avg episode reward: [(0, '2.924')]
|
4095 |
+
[2025-08-05 05:22:19,330][34895] Signal inference workers to stop experience collection...
|
4096 |
+
[2025-08-05 05:22:19,352][34917] InferenceWorker_p0-w0: stopping experience collection
|
4097 |
+
[2025-08-05 05:22:19,581][34926] Decorrelating experience for 96 frames...
|
4098 |
+
[2025-08-05 05:22:20,291][34895] Signal inference workers to resume experience collection...
|
4099 |
+
[2025-08-05 05:22:20,292][34917] InferenceWorker_p0-w0: resuming experience collection
|
4100 |
+
[2025-08-05 05:22:24,060][34856] Fps is (10 sec: 2048.0, 60 sec: 1104.0, 300 sec: 1104.0). Total num frames: 434176. Throughput: 0: 323.4. Samples: 6000. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4101 |
+
[2025-08-05 05:22:24,061][34856] Avg episode reward: [(0, '3.680')]
|
4102 |
+
[2025-08-05 05:22:28,749][34917] Updated weights for policy 0, policy_version 111 (0.0024)
|
4103 |
+
[2025-08-05 05:22:29,060][34856] Fps is (10 sec: 4096.0, 60 sec: 1739.2, 300 sec: 1739.2). Total num frames: 454656. Throughput: 0: 390.2. Samples: 9190. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4104 |
+
[2025-08-05 05:22:29,061][34856] Avg episode reward: [(0, '3.968')]
|
4105 |
+
[2025-08-05 05:22:34,060][34856] Fps is (10 sec: 3686.4, 60 sec: 2008.5, 300 sec: 2008.5). Total num frames: 471040. Throughput: 0: 472.4. Samples: 13486. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4106 |
+
[2025-08-05 05:22:34,061][34856] Avg episode reward: [(0, '4.534')]
|
4107 |
+
[2025-08-05 05:22:39,060][34856] Fps is (10 sec: 3276.8, 60 sec: 2197.5, 300 sec: 2197.5). Total num frames: 487424. Throughput: 0: 570.0. Samples: 19120. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4108 |
+
[2025-08-05 05:22:39,062][34856] Avg episode reward: [(0, '4.453')]
|
4109 |
+
[2025-08-05 05:22:40,503][34917] Updated weights for policy 0, policy_version 121 (0.0051)
|
4110 |
+
[2025-08-05 05:22:44,060][34856] Fps is (10 sec: 3276.8, 60 sec: 2337.5, 300 sec: 2337.5). Total num frames: 503808. Throughput: 0: 578.8. Samples: 22308. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4111 |
+
[2025-08-05 05:22:44,061][34856] Avg episode reward: [(0, '4.465')]
|
4112 |
+
[2025-08-05 05:22:49,060][34856] Fps is (10 sec: 3276.8, 60 sec: 2445.3, 300 sec: 2445.3). Total num frames: 520192. Throughput: 0: 612.4. Samples: 26666. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4113 |
+
[2025-08-05 05:22:49,061][34856] Avg episode reward: [(0, '4.296')]
|
4114 |
+
[2025-08-05 05:22:51,997][34917] Updated weights for policy 0, policy_version 131 (0.0039)
|
4115 |
+
[2025-08-05 05:22:54,060][34856] Fps is (10 sec: 4096.0, 60 sec: 2700.0, 300 sec: 2699.7). Total num frames: 544768. Throughput: 0: 737.3. Samples: 33178. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4116 |
+
[2025-08-05 05:22:54,061][34856] Avg episode reward: [(0, '4.197')]
|
4117 |
+
[2025-08-05 05:22:59,061][34856] Fps is (10 sec: 4095.7, 60 sec: 2753.9, 300 sec: 2753.5). Total num frames: 561152. Throughput: 0: 809.2. Samples: 36424. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4118 |
+
[2025-08-05 05:22:59,062][34856] Avg episode reward: [(0, '4.227')]
|
4119 |
+
[2025-08-05 05:23:03,480][34917] Updated weights for policy 0, policy_version 141 (0.0026)
|
4120 |
+
[2025-08-05 05:23:04,060][34856] Fps is (10 sec: 3276.8, 60 sec: 2798.7, 300 sec: 2798.3). Total num frames: 577536. Throughput: 0: 864.0. Samples: 40676. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4121 |
+
[2025-08-05 05:23:04,061][34856] Avg episode reward: [(0, '4.336')]
|
4122 |
+
[2025-08-05 05:23:09,060][34856] Fps is (10 sec: 3686.7, 60 sec: 3072.0, 300 sec: 2900.4). Total num frames: 598016. Throughput: 0: 910.4. Samples: 46970. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4123 |
+
[2025-08-05 05:23:09,061][34856] Avg episode reward: [(0, '4.408')]
|
4124 |
+
[2025-08-05 05:23:13,827][34917] Updated weights for policy 0, policy_version 151 (0.0024)
|
4125 |
+
[2025-08-05 05:23:14,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 2987.6). Total num frames: 618496. Throughput: 0: 911.2. Samples: 50192. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4126 |
+
[2025-08-05 05:23:14,061][34856] Avg episode reward: [(0, '4.647')]
|
4127 |
+
[2025-08-05 05:23:19,064][34856] Fps is (10 sec: 3684.9, 60 sec: 3686.1, 300 sec: 3007.1). Total num frames: 634880. Throughput: 0: 913.2. Samples: 54586. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4128 |
+
[2025-08-05 05:23:19,065][34856] Avg episode reward: [(0, '4.528')]
|
4129 |
+
[2025-08-05 05:23:24,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3076.5). Total num frames: 655360. Throughput: 0: 931.0. Samples: 61014. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4130 |
+
[2025-08-05 05:23:24,062][34856] Avg episode reward: [(0, '4.372')]
|
4131 |
+
[2025-08-05 05:23:24,624][34917] Updated weights for policy 0, policy_version 161 (0.0022)
|
4132 |
+
[2025-08-05 05:23:29,064][34856] Fps is (10 sec: 4096.3, 60 sec: 3686.2, 300 sec: 3137.4). Total num frames: 675840. Throughput: 0: 931.6. Samples: 64234. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4133 |
+
[2025-08-05 05:23:29,064][34856] Avg episode reward: [(0, '4.652')]
|
4134 |
+
[2025-08-05 05:23:34,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3145.4). Total num frames: 692224. Throughput: 0: 932.8. Samples: 68640. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0)
|
4135 |
+
[2025-08-05 05:23:34,061][34856] Avg episode reward: [(0, '4.770')]
|
4136 |
+
[2025-08-05 05:23:34,062][34895] Saving new best policy, reward=4.770!
|
4137 |
+
[2025-08-05 05:23:36,041][34917] Updated weights for policy 0, policy_version 171 (0.0017)
|
4138 |
+
[2025-08-05 05:23:39,060][34856] Fps is (10 sec: 3687.7, 60 sec: 3754.7, 300 sec: 3196.2). Total num frames: 712704. Throughput: 0: 928.2. Samples: 74946. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4139 |
+
[2025-08-05 05:23:39,061][34856] Avg episode reward: [(0, '4.745')]
|
4140 |
+
[2025-08-05 05:23:39,068][34895] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000174_712704.pth...
|
4141 |
+
[2025-08-05 05:23:39,202][34895] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000099_405504.pth
|
4142 |
+
[2025-08-05 05:23:44,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3200.3). Total num frames: 729088. Throughput: 0: 926.6. Samples: 78122. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4143 |
+
[2025-08-05 05:23:44,063][34856] Avg episode reward: [(0, '4.787')]
|
4144 |
+
[2025-08-05 05:23:44,064][34895] Saving new best policy, reward=4.787!
|
4145 |
+
[2025-08-05 05:23:47,716][34917] Updated weights for policy 0, policy_version 181 (0.0032)
|
4146 |
+
[2025-08-05 05:23:49,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3204.0). Total num frames: 745472. Throughput: 0: 927.4. Samples: 82408. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4147 |
+
[2025-08-05 05:23:49,061][34856] Avg episode reward: [(0, '4.628')]
|
4148 |
+
[2025-08-05 05:23:54,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3245.1). Total num frames: 765952. Throughput: 0: 930.6. Samples: 88846. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4149 |
+
[2025-08-05 05:23:54,061][34856] Avg episode reward: [(0, '4.592')]
|
4150 |
+
[2025-08-05 05:23:57,439][34917] Updated weights for policy 0, policy_version 191 (0.0024)
|
4151 |
+
[2025-08-05 05:23:59,061][34856] Fps is (10 sec: 4095.7, 60 sec: 3754.7, 300 sec: 3282.5). Total num frames: 786432. Throughput: 0: 930.6. Samples: 92072. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4152 |
+
[2025-08-05 05:23:59,065][34856] Avg episode reward: [(0, '4.798')]
|
4153 |
+
[2025-08-05 05:23:59,077][34895] Saving new best policy, reward=4.798!
|
4154 |
+
[2025-08-05 05:24:04,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3282.3). Total num frames: 802816. Throughput: 0: 927.7. Samples: 96330. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4155 |
+
[2025-08-05 05:24:04,061][34856] Avg episode reward: [(0, '4.694')]
|
4156 |
+
[2025-08-05 05:24:08,894][34917] Updated weights for policy 0, policy_version 201 (0.0030)
|
4157 |
+
[2025-08-05 05:24:09,060][34856] Fps is (10 sec: 3686.7, 60 sec: 3754.7, 300 sec: 3315.2). Total num frames: 823296. Throughput: 0: 927.4. Samples: 102748. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4158 |
+
[2025-08-05 05:24:09,061][34856] Avg episode reward: [(0, '4.739')]
|
4159 |
+
[2025-08-05 05:24:14,060][34856] Fps is (10 sec: 3686.3, 60 sec: 3686.4, 300 sec: 3313.7). Total num frames: 839680. Throughput: 0: 926.4. Samples: 105918. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4160 |
+
[2025-08-05 05:24:14,061][34856] Avg episode reward: [(0, '4.873')]
|
4161 |
+
[2025-08-05 05:24:14,062][34895] Saving new best policy, reward=4.873!
|
4162 |
+
[2025-08-05 05:24:19,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3686.7, 300 sec: 3312.4). Total num frames: 856064. Throughput: 0: 925.7. Samples: 110296. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4163 |
+
[2025-08-05 05:24:19,061][34856] Avg episode reward: [(0, '4.902')]
|
4164 |
+
[2025-08-05 05:24:19,072][34895] Saving new best policy, reward=4.902!
|
4165 |
+
[2025-08-05 05:24:20,554][34917] Updated weights for policy 0, policy_version 211 (0.0028)
|
4166 |
+
[2025-08-05 05:24:24,060][34856] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3340.6). Total num frames: 876544. Throughput: 0: 928.4. Samples: 116726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4167 |
+
[2025-08-05 05:24:24,061][34856] Avg episode reward: [(0, '4.910')]
|
4168 |
+
[2025-08-05 05:24:24,062][34895] Saving new best policy, reward=4.910!
|
4169 |
+
[2025-08-05 05:24:29,061][34856] Fps is (10 sec: 4095.7, 60 sec: 3686.6, 300 sec: 3366.9). Total num frames: 897024. Throughput: 0: 927.1. Samples: 119844. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4170 |
+
[2025-08-05 05:24:29,062][34856] Avg episode reward: [(0, '4.864')]
|
4171 |
+
[2025-08-05 05:24:31,811][34917] Updated weights for policy 0, policy_version 221 (0.0041)
|
4172 |
+
[2025-08-05 05:24:34,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3363.9). Total num frames: 913408. Throughput: 0: 930.2. Samples: 124266. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4173 |
+
[2025-08-05 05:24:34,061][34856] Avg episode reward: [(0, '4.795')]
|
4174 |
+
[2025-08-05 05:24:39,060][34856] Fps is (10 sec: 3686.7, 60 sec: 3686.4, 300 sec: 3387.8). Total num frames: 933888. Throughput: 0: 930.8. Samples: 130730. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4175 |
+
[2025-08-05 05:24:39,061][34856] Avg episode reward: [(0, '5.241')]
|
4176 |
+
[2025-08-05 05:24:39,067][34895] Saving new best policy, reward=5.241!
|
4177 |
+
[2025-08-05 05:24:41,474][34917] Updated weights for policy 0, policy_version 231 (0.0026)
|
4178 |
+
[2025-08-05 05:24:44,064][34856] Fps is (10 sec: 3685.1, 60 sec: 3686.2, 300 sec: 3384.2). Total num frames: 950272. Throughput: 0: 924.2. Samples: 133662. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4179 |
+
[2025-08-05 05:24:44,065][34856] Avg episode reward: [(0, '5.392')]
|
4180 |
+
[2025-08-05 05:24:44,066][34895] Saving new best policy, reward=5.392!
|
4181 |
+
[2025-08-05 05:24:49,065][34856] Fps is (10 sec: 2865.9, 60 sec: 3617.8, 300 sec: 3355.8). Total num frames: 962560. Throughput: 0: 902.3. Samples: 136938. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4182 |
+
[2025-08-05 05:24:49,066][34856] Avg episode reward: [(0, '5.364')]
|
4183 |
+
[2025-08-05 05:24:54,060][34856] Fps is (10 sec: 3278.0, 60 sec: 3618.1, 300 sec: 3377.9). Total num frames: 983040. Throughput: 0: 889.8. Samples: 142790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4184 |
+
[2025-08-05 05:24:54,061][34856] Avg episode reward: [(0, '5.046')]
|
4185 |
+
[2025-08-05 05:24:54,794][34917] Updated weights for policy 0, policy_version 241 (0.0026)
|
4186 |
+
[2025-08-05 05:24:59,060][34856] Fps is (10 sec: 4097.9, 60 sec: 3618.2, 300 sec: 3398.6). Total num frames: 1003520. Throughput: 0: 890.2. Samples: 145978. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4187 |
+
[2025-08-05 05:24:59,061][34856] Avg episode reward: [(0, '4.944')]
|
4188 |
+
[2025-08-05 05:25:04,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3372.2). Total num frames: 1015808. Throughput: 0: 892.5. Samples: 150460. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4189 |
+
[2025-08-05 05:25:04,061][34856] Avg episode reward: [(0, '5.341')]
|
4190 |
+
[2025-08-05 05:25:06,348][34917] Updated weights for policy 0, policy_version 251 (0.0037)
|
4191 |
+
[2025-08-05 05:25:09,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3391.9). Total num frames: 1036288. Throughput: 0: 891.3. Samples: 156834. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4192 |
+
[2025-08-05 05:25:09,061][34856] Avg episode reward: [(0, '5.470')]
|
4193 |
+
[2025-08-05 05:25:09,068][34895] Saving new best policy, reward=5.470!
|
4194 |
+
[2025-08-05 05:25:14,060][34856] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 3410.6). Total num frames: 1056768. Throughput: 0: 889.8. Samples: 159884. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4195 |
+
[2025-08-05 05:25:14,061][34856] Avg episode reward: [(0, '5.257')]
|
4196 |
+
[2025-08-05 05:25:17,831][34917] Updated weights for policy 0, policy_version 261 (0.0041)
|
4197 |
+
[2025-08-05 05:25:19,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3407.1). Total num frames: 1073152. Throughput: 0: 890.6. Samples: 164344. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
4198 |
+
[2025-08-05 05:25:19,061][34856] Avg episode reward: [(0, '5.151')]
|
4199 |
+
[2025-08-05 05:25:24,060][34856] Fps is (10 sec: 3686.5, 60 sec: 3618.1, 300 sec: 3424.5). Total num frames: 1093632. Throughput: 0: 887.6. Samples: 170672. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4200 |
+
[2025-08-05 05:25:24,061][34856] Avg episode reward: [(0, '5.340')]
|
4201 |
+
[2025-08-05 05:25:27,277][34917] Updated weights for policy 0, policy_version 271 (0.0044)
|
4202 |
+
[2025-08-05 05:25:29,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3618.2, 300 sec: 3441.0). Total num frames: 1114112. Throughput: 0: 893.4. Samples: 173864. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4203 |
+
[2025-08-05 05:25:29,061][34856] Avg episode reward: [(0, '5.474')]
|
4204 |
+
[2025-08-05 05:25:29,073][34895] Saving new best policy, reward=5.474!
|
4205 |
+
[2025-08-05 05:25:34,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3417.4). Total num frames: 1126400. Throughput: 0: 916.5. Samples: 178174. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4206 |
+
[2025-08-05 05:25:34,061][34856] Avg episode reward: [(0, '5.615')]
|
4207 |
+
[2025-08-05 05:25:34,062][34895] Saving new best policy, reward=5.615!
|
4208 |
+
[2025-08-05 05:25:39,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3433.3). Total num frames: 1146880. Throughput: 0: 927.4. Samples: 184524. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4209 |
+
[2025-08-05 05:25:39,061][34856] Avg episode reward: [(0, '5.151')]
|
4210 |
+
[2025-08-05 05:25:39,080][34917] Updated weights for policy 0, policy_version 281 (0.0025)
|
4211 |
+
[2025-08-05 05:25:39,082][34895] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000281_1150976.pth...
|
4212 |
+
[2025-08-05 05:25:39,211][34895] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000101_413696.pth
|
4213 |
+
[2025-08-05 05:25:44,061][34856] Fps is (10 sec: 4095.6, 60 sec: 3618.3, 300 sec: 3448.4). Total num frames: 1167360. Throughput: 0: 923.2. Samples: 187524. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4214 |
+
[2025-08-05 05:25:44,062][34856] Avg episode reward: [(0, '5.222')]
|
4215 |
+
[2025-08-05 05:25:49,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.7, 300 sec: 3444.6). Total num frames: 1183744. Throughput: 0: 920.0. Samples: 191862. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4216 |
+
[2025-08-05 05:25:49,061][34856] Avg episode reward: [(0, '5.219')]
|
4217 |
+
[2025-08-05 05:25:50,835][34917] Updated weights for policy 0, policy_version 291 (0.0039)
|
4218 |
+
[2025-08-05 05:25:54,060][34856] Fps is (10 sec: 3686.8, 60 sec: 3686.4, 300 sec: 3458.9). Total num frames: 1204224. Throughput: 0: 921.6. Samples: 198304. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
4219 |
+
[2025-08-05 05:25:54,061][34856] Avg episode reward: [(0, '5.108')]
|
4220 |
+
[2025-08-05 05:25:59,065][34856] Fps is (10 sec: 4094.1, 60 sec: 3686.1, 300 sec: 3472.4). Total num frames: 1224704. Throughput: 0: 924.3. Samples: 201482. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4221 |
+
[2025-08-05 05:25:59,066][34856] Avg episode reward: [(0, '5.643')]
|
4222 |
+
[2025-08-05 05:25:59,078][34895] Saving new best policy, reward=5.643!
|
4223 |
+
[2025-08-05 05:26:02,111][34917] Updated weights for policy 0, policy_version 301 (0.0033)
|
4224 |
+
[2025-08-05 05:26:04,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3451.2). Total num frames: 1236992. Throughput: 0: 920.4. Samples: 205762. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4225 |
+
[2025-08-05 05:26:04,061][34856] Avg episode reward: [(0, '5.463')]
|
4226 |
+
[2025-08-05 05:26:09,060][34856] Fps is (10 sec: 3278.3, 60 sec: 3686.4, 300 sec: 3464.5). Total num frames: 1257472. Throughput: 0: 922.5. Samples: 212186. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4227 |
+
[2025-08-05 05:26:09,061][34856] Avg episode reward: [(0, '5.464')]
|
4228 |
+
[2025-08-05 05:26:11,997][34917] Updated weights for policy 0, policy_version 311 (0.0021)
|
4229 |
+
[2025-08-05 05:26:14,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3477.2). Total num frames: 1277952. Throughput: 0: 921.3. Samples: 215324. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4230 |
+
[2025-08-05 05:26:14,061][34856] Avg episode reward: [(0, '5.687')]
|
4231 |
+
[2025-08-05 05:26:14,062][34895] Saving new best policy, reward=5.687!
|
4232 |
+
[2025-08-05 05:26:19,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3473.2). Total num frames: 1294336. Throughput: 0: 922.9. Samples: 219704. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4233 |
+
[2025-08-05 05:26:19,061][34856] Avg episode reward: [(0, '5.381')]
|
4234 |
+
[2025-08-05 05:26:23,670][34917] Updated weights for policy 0, policy_version 321 (0.0032)
|
4235 |
+
[2025-08-05 05:26:24,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3485.3). Total num frames: 1314816. Throughput: 0: 921.5. Samples: 225990. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4236 |
+
[2025-08-05 05:26:24,061][34856] Avg episode reward: [(0, '5.418')]
|
4237 |
+
[2025-08-05 05:26:29,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3481.3). Total num frames: 1331200. Throughput: 0: 926.4. Samples: 229210. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
4238 |
+
[2025-08-05 05:26:29,061][34856] Avg episode reward: [(0, '5.319')]
|
4239 |
+
[2025-08-05 05:26:34,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3477.5). Total num frames: 1347584. Throughput: 0: 926.6. Samples: 233558. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4240 |
+
[2025-08-05 05:26:34,061][34856] Avg episode reward: [(0, '5.394')]
|
4241 |
+
[2025-08-05 05:26:35,115][34917] Updated weights for policy 0, policy_version 331 (0.0027)
|
4242 |
+
[2025-08-05 05:26:39,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3503.8). Total num frames: 1372160. Throughput: 0: 925.6. Samples: 239956. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4243 |
+
[2025-08-05 05:26:39,061][34856] Avg episode reward: [(0, '5.038')]
|
4244 |
+
[2025-08-05 05:26:44,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3686.5, 300 sec: 3499.7). Total num frames: 1388544. Throughput: 0: 923.4. Samples: 243032. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4245 |
+
[2025-08-05 05:26:44,061][34856] Avg episode reward: [(0, '5.238')]
|
4246 |
+
[2025-08-05 05:26:46,160][34917] Updated weights for policy 0, policy_version 341 (0.0044)
|
4247 |
+
[2025-08-05 05:26:49,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3495.8). Total num frames: 1404928. Throughput: 0: 924.0. Samples: 247342. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4248 |
+
[2025-08-05 05:26:49,061][34856] Avg episode reward: [(0, '5.382')]
|
4249 |
+
[2025-08-05 05:26:54,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3506.3). Total num frames: 1425408. Throughput: 0: 924.8. Samples: 253802. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4250 |
+
[2025-08-05 05:26:54,061][34856] Avg episode reward: [(0, '5.461')]
|
4251 |
+
[2025-08-05 05:26:56,119][34917] Updated weights for policy 0, policy_version 351 (0.0033)
|
4252 |
+
[2025-08-05 05:26:59,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3686.7, 300 sec: 3516.3). Total num frames: 1445888. Throughput: 0: 925.9. Samples: 256988. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4253 |
+
[2025-08-05 05:26:59,068][34856] Avg episode reward: [(0, '5.522')]
|
4254 |
+
[2025-08-05 05:27:04,060][34856] Fps is (10 sec: 3686.3, 60 sec: 3754.7, 300 sec: 3554.5). Total num frames: 1462272. Throughput: 0: 924.5. Samples: 261308. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4255 |
+
[2025-08-05 05:27:04,061][34856] Avg episode reward: [(0, '5.534')]
|
4256 |
+
[2025-08-05 05:27:07,745][34917] Updated weights for policy 0, policy_version 361 (0.0023)
|
4257 |
+
[2025-08-05 05:27:09,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3623.9). Total num frames: 1482752. Throughput: 0: 927.8. Samples: 267740. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4258 |
+
[2025-08-05 05:27:09,061][34856] Avg episode reward: [(0, '5.264')]
|
4259 |
+
[2025-08-05 05:27:14,060][34856] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 1499136. Throughput: 0: 924.1. Samples: 270796. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4260 |
+
[2025-08-05 05:27:14,061][34856] Avg episode reward: [(0, '5.588')]
|
4261 |
+
[2025-08-05 05:27:19,063][34856] Fps is (10 sec: 3275.9, 60 sec: 3686.2, 300 sec: 3665.5). Total num frames: 1515520. Throughput: 0: 923.5. Samples: 275118. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4262 |
+
[2025-08-05 05:27:19,064][34856] Avg episode reward: [(0, '5.595')]
|
4263 |
+
[2025-08-05 05:27:19,240][34917] Updated weights for policy 0, policy_version 371 (0.0036)
|
4264 |
+
[2025-08-05 05:27:24,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 1536000. Throughput: 0: 925.3. Samples: 281594. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0)
|
4265 |
+
[2025-08-05 05:27:24,061][34856] Avg episode reward: [(0, '5.457')]
|
4266 |
+
[2025-08-05 05:27:29,061][34856] Fps is (10 sec: 4097.0, 60 sec: 3754.6, 300 sec: 3679.5). Total num frames: 1556480. Throughput: 0: 929.1. Samples: 284840. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4267 |
+
[2025-08-05 05:27:29,061][34856] Avg episode reward: [(0, '5.678')]
|
4268 |
+
[2025-08-05 05:27:29,811][34917] Updated weights for policy 0, policy_version 381 (0.0033)
|
4269 |
+
[2025-08-05 05:27:34,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 1572864. Throughput: 0: 930.7. Samples: 289222. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4270 |
+
[2025-08-05 05:27:34,061][34856] Avg episode reward: [(0, '6.238')]
|
4271 |
+
[2025-08-05 05:27:34,062][34895] Saving new best policy, reward=6.238!
|
4272 |
+
[2025-08-05 05:27:39,061][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3693.3). Total num frames: 1593344. Throughput: 0: 928.1. Samples: 295568. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4273 |
+
[2025-08-05 05:27:39,061][34856] Avg episode reward: [(0, '6.189')]
|
4274 |
+
[2025-08-05 05:27:39,068][34895] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000389_1593344.pth...
|
4275 |
+
[2025-08-05 05:27:39,210][34895] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000174_712704.pth
|
4276 |
+
[2025-08-05 05:27:40,836][34917] Updated weights for policy 0, policy_version 391 (0.0033)
|
4277 |
+
[2025-08-05 05:27:44,064][34856] Fps is (10 sec: 3685.2, 60 sec: 3686.2, 300 sec: 3693.3). Total num frames: 1609728. Throughput: 0: 923.0. Samples: 298526. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
4278 |
+
[2025-08-05 05:27:44,069][34856] Avg episode reward: [(0, '6.138')]
|
4279 |
+
[2025-08-05 05:27:49,060][34856] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 1626112. Throughput: 0: 919.9. Samples: 302702. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4280 |
+
[2025-08-05 05:27:49,061][34856] Avg episode reward: [(0, '5.672')]
|
4281 |
+
[2025-08-05 05:27:52,490][34917] Updated weights for policy 0, policy_version 401 (0.0031)
|
4282 |
+
[2025-08-05 05:27:54,060][34856] Fps is (10 sec: 3687.7, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 1646592. Throughput: 0: 919.0. Samples: 309094. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4283 |
+
[2025-08-05 05:27:54,061][34856] Avg episode reward: [(0, '5.513')]
|
4284 |
+
[2025-08-05 05:27:59,062][34856] Fps is (10 sec: 4095.2, 60 sec: 3686.3, 300 sec: 3693.3). Total num frames: 1667072. Throughput: 0: 921.6. Samples: 312272. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4285 |
+
[2025-08-05 05:27:59,063][34856] Avg episode reward: [(0, '5.391')]
|
4286 |
+
[2025-08-05 05:28:03,851][34917] Updated weights for policy 0, policy_version 411 (0.0035)
|
4287 |
+
[2025-08-05 05:28:04,062][34856] Fps is (10 sec: 3685.8, 60 sec: 3686.3, 300 sec: 3679.4). Total num frames: 1683456. Throughput: 0: 921.7. Samples: 316592. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4288 |
+
[2025-08-05 05:28:04,062][34856] Avg episode reward: [(0, '5.652')]
|
4289 |
+
[2025-08-05 05:28:09,060][34856] Fps is (10 sec: 3687.1, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 1703936. Throughput: 0: 920.6. Samples: 323020. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4290 |
+
[2025-08-05 05:28:09,061][34856] Avg episode reward: [(0, '6.088')]
|
4291 |
+
[2025-08-05 05:28:14,060][34856] Fps is (10 sec: 3686.9, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 1720320. Throughput: 0: 916.7. Samples: 326092. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4292 |
+
[2025-08-05 05:28:14,063][34856] Avg episode reward: [(0, '6.166')]
|
4293 |
+
[2025-08-05 05:28:14,708][34917] Updated weights for policy 0, policy_version 421 (0.0025)
|
4294 |
+
[2025-08-05 05:28:19,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3686.6, 300 sec: 3665.6). Total num frames: 1736704. Throughput: 0: 916.3. Samples: 330454. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4295 |
+
[2025-08-05 05:28:19,063][34856] Avg episode reward: [(0, '6.037')]
|
4296 |
+
[2025-08-05 05:28:24,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 1757184. Throughput: 0: 917.3. Samples: 336848. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4297 |
+
[2025-08-05 05:28:24,061][34856] Avg episode reward: [(0, '5.830')]
|
4298 |
+
[2025-08-05 05:28:25,123][34917] Updated weights for policy 0, policy_version 431 (0.0022)
|
4299 |
+
[2025-08-05 05:28:29,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 1777664. Throughput: 0: 923.6. Samples: 340084. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4300 |
+
[2025-08-05 05:28:29,061][34856] Avg episode reward: [(0, '6.213')]
|
4301 |
+
[2025-08-05 05:28:34,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 1794048. Throughput: 0: 929.1. Samples: 344510. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4302 |
+
[2025-08-05 05:28:34,061][34856] Avg episode reward: [(0, '6.279')]
|
4303 |
+
[2025-08-05 05:28:34,062][34895] Saving new best policy, reward=6.279!
|
4304 |
+
[2025-08-05 05:28:36,700][34917] Updated weights for policy 0, policy_version 441 (0.0023)
|
4305 |
+
[2025-08-05 05:28:39,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 1814528. Throughput: 0: 929.0. Samples: 350898. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4306 |
+
[2025-08-05 05:28:39,061][34856] Avg episode reward: [(0, '6.721')]
|
4307 |
+
[2025-08-05 05:28:39,069][34895] Saving new best policy, reward=6.721!
|
4308 |
+
[2025-08-05 05:28:44,062][34856] Fps is (10 sec: 3685.8, 60 sec: 3686.5, 300 sec: 3679.4). Total num frames: 1830912. Throughput: 0: 925.4. Samples: 353916. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4309 |
+
[2025-08-05 05:28:44,063][34856] Avg episode reward: [(0, '6.771')]
|
4310 |
+
[2025-08-05 05:28:44,064][34895] Saving new best policy, reward=6.771!
|
4311 |
+
[2025-08-05 05:28:48,071][34917] Updated weights for policy 0, policy_version 451 (0.0040)
|
4312 |
+
[2025-08-05 05:28:49,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 1847296. Throughput: 0: 928.7. Samples: 358380. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4313 |
+
[2025-08-05 05:28:49,061][34856] Avg episode reward: [(0, '6.853')]
|
4314 |
+
[2025-08-05 05:28:49,070][34895] Saving new best policy, reward=6.853!
|
4315 |
+
[2025-08-05 05:28:54,060][34856] Fps is (10 sec: 3277.4, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 1863680. Throughput: 0: 882.5. Samples: 362732. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0)
|
4316 |
+
[2025-08-05 05:28:54,061][34856] Avg episode reward: [(0, '7.391')]
|
4317 |
+
[2025-08-05 05:28:54,062][34895] Saving new best policy, reward=7.391!
|
4318 |
+
[2025-08-05 05:28:59,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3651.7). Total num frames: 1880064. Throughput: 0: 883.6. Samples: 365852. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4319 |
+
[2025-08-05 05:28:59,061][34856] Avg episode reward: [(0, '7.353')]
|
4320 |
+
[2025-08-05 05:29:01,616][34917] Updated weights for policy 0, policy_version 461 (0.0030)
|
4321 |
+
[2025-08-05 05:29:04,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3637.8). Total num frames: 1896448. Throughput: 0: 886.4. Samples: 370344. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4322 |
+
[2025-08-05 05:29:04,061][34856] Avg episode reward: [(0, '7.967')]
|
4323 |
+
[2025-08-05 05:29:04,062][34895] Saving new best policy, reward=7.967!
|
4324 |
+
[2025-08-05 05:29:09,061][34856] Fps is (10 sec: 3686.3, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1916928. Throughput: 0: 886.3. Samples: 376734. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4325 |
+
[2025-08-05 05:29:09,061][34856] Avg episode reward: [(0, '8.134')]
|
4326 |
+
[2025-08-05 05:29:09,068][34895] Saving new best policy, reward=8.134!
|
4327 |
+
[2025-08-05 05:29:11,188][34917] Updated weights for policy 0, policy_version 471 (0.0028)
|
4328 |
+
[2025-08-05 05:29:14,063][34856] Fps is (10 sec: 3685.4, 60 sec: 3549.7, 300 sec: 3651.7). Total num frames: 1933312. Throughput: 0: 880.1. Samples: 379692. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4329 |
+
[2025-08-05 05:29:14,064][34856] Avg episode reward: [(0, '8.261')]
|
4330 |
+
[2025-08-05 05:29:14,065][34895] Saving new best policy, reward=8.261!
|
4331 |
+
[2025-08-05 05:29:19,060][34856] Fps is (10 sec: 3686.5, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 1953792. Throughput: 0: 882.9. Samples: 384242. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4332 |
+
[2025-08-05 05:29:19,061][34856] Avg episode reward: [(0, '8.020')]
|
4333 |
+
[2025-08-05 05:29:22,730][34917] Updated weights for policy 0, policy_version 481 (0.0030)
|
4334 |
+
[2025-08-05 05:29:24,060][34856] Fps is (10 sec: 4097.1, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 1974272. Throughput: 0: 886.1. Samples: 390774. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4335 |
+
[2025-08-05 05:29:24,061][34856] Avg episode reward: [(0, '8.824')]
|
4336 |
+
[2025-08-05 05:29:24,062][34895] Saving new best policy, reward=8.879!
|
4337 |
+
[2025-08-05 05:29:29,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1990656. Throughput: 0: 885.8. Samples: 393774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4338 |
+
[2025-08-05 05:29:29,061][34856] Avg episode reward: [(0, '9.422')]
|
4339 |
+
[2025-08-05 05:29:29,067][34895] Saving new best policy, reward=9.422!
|
4340 |
+
[2025-08-05 05:29:34,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3637.8). Total num frames: 2007040. Throughput: 0: 887.5. Samples: 398316. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4341 |
+
[2025-08-05 05:29:34,061][34856] Avg episode reward: [(0, '9.517')]
|
4342 |
+
[2025-08-05 05:29:34,062][34895] Saving new best policy, reward=9.517!
|
4343 |
+
[2025-08-05 05:29:34,295][34917] Updated weights for policy 0, policy_version 491 (0.0032)
|
4344 |
+
[2025-08-05 05:29:39,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 2031616. Throughput: 0: 934.1. Samples: 404766. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4345 |
+
[2025-08-05 05:29:39,061][34856] Avg episode reward: [(0, '9.634')]
|
4346 |
+
[2025-08-05 05:29:39,068][34895] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000496_2031616.pth...
|
4347 |
+
[2025-08-05 05:29:39,205][34895] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000281_1150976.pth
|
4348 |
+
[2025-08-05 05:29:39,217][34895] Saving new best policy, reward=9.634!
|
4349 |
+
[2025-08-05 05:29:44,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3618.2, 300 sec: 3679.5). Total num frames: 2048000. Throughput: 0: 928.7. Samples: 407644. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4350 |
+
[2025-08-05 05:29:44,062][34856] Avg episode reward: [(0, '9.544')]
|
4351 |
+
[2025-08-05 05:29:45,402][34917] Updated weights for policy 0, policy_version 501 (0.0029)
|
4352 |
+
[2025-08-05 05:29:49,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 2064384. Throughput: 0: 930.5. Samples: 412216. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4353 |
+
[2025-08-05 05:29:49,061][34856] Avg episode reward: [(0, '9.932')]
|
4354 |
+
[2025-08-05 05:29:49,070][34895] Saving new best policy, reward=9.932!
|
4355 |
+
[2025-08-05 05:29:54,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 2084864. Throughput: 0: 933.3. Samples: 418732. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4356 |
+
[2025-08-05 05:29:54,061][34856] Avg episode reward: [(0, '11.133')]
|
4357 |
+
[2025-08-05 05:29:54,061][34895] Saving new best policy, reward=11.133!
|
4358 |
+
[2025-08-05 05:29:55,274][34917] Updated weights for policy 0, policy_version 511 (0.0021)
|
4359 |
+
[2025-08-05 05:29:59,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 2101248. Throughput: 0: 933.7. Samples: 421704. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4360 |
+
[2025-08-05 05:29:59,061][34856] Avg episode reward: [(0, '10.797')]
|
4361 |
+
[2025-08-05 05:30:04,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 2121728. Throughput: 0: 936.0. Samples: 426364. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4362 |
+
[2025-08-05 05:30:04,061][34856] Avg episode reward: [(0, '10.011')]
|
4363 |
+
[2025-08-05 05:30:06,840][34917] Updated weights for policy 0, policy_version 521 (0.0040)
|
4364 |
+
[2025-08-05 05:30:09,065][34856] Fps is (10 sec: 4094.2, 60 sec: 3754.4, 300 sec: 3679.4). Total num frames: 2142208. Throughput: 0: 935.4. Samples: 432870. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4365 |
+
[2025-08-05 05:30:09,065][34856] Avg episode reward: [(0, '8.859')]
|
4366 |
+
[2025-08-05 05:30:14,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.8, 300 sec: 3679.5). Total num frames: 2158592. Throughput: 0: 933.7. Samples: 435790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4367 |
+
[2025-08-05 05:30:14,061][34856] Avg episode reward: [(0, '8.819')]
|
4368 |
+
[2025-08-05 05:30:18,088][34917] Updated weights for policy 0, policy_version 531 (0.0024)
|
4369 |
+
[2025-08-05 05:30:19,065][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.4, 300 sec: 3679.4). Total num frames: 2179072. Throughput: 0: 936.4. Samples: 440458. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4370 |
+
[2025-08-05 05:30:19,065][34856] Avg episode reward: [(0, '9.009')]
|
4371 |
+
[2025-08-05 05:30:24,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 2199552. Throughput: 0: 937.0. Samples: 446930. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4372 |
+
[2025-08-05 05:30:24,061][34856] Avg episode reward: [(0, '9.064')]
|
4373 |
+
[2025-08-05 05:30:28,542][34917] Updated weights for policy 0, policy_version 541 (0.0025)
|
4374 |
+
[2025-08-05 05:30:29,060][34856] Fps is (10 sec: 3688.0, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 2215936. Throughput: 0: 936.7. Samples: 449794. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4375 |
+
[2025-08-05 05:30:29,063][34856] Avg episode reward: [(0, '10.455')]
|
4376 |
+
[2025-08-05 05:30:34,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 2232320. Throughput: 0: 940.4. Samples: 454534. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4377 |
+
[2025-08-05 05:30:34,061][34856] Avg episode reward: [(0, '10.940')]
|
4378 |
+
[2025-08-05 05:30:38,806][34917] Updated weights for policy 0, policy_version 551 (0.0023)
|
4379 |
+
[2025-08-05 05:30:39,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3693.4). Total num frames: 2256896. Throughput: 0: 942.5. Samples: 461146. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4380 |
+
[2025-08-05 05:30:39,061][34856] Avg episode reward: [(0, '11.984')]
|
4381 |
+
[2025-08-05 05:30:39,069][34895] Saving new best policy, reward=11.984!
|
4382 |
+
[2025-08-05 05:30:44,062][34856] Fps is (10 sec: 3685.8, 60 sec: 3686.3, 300 sec: 3679.4). Total num frames: 2269184. Throughput: 0: 937.5. Samples: 463892. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4383 |
+
[2025-08-05 05:30:44,064][34856] Avg episode reward: [(0, '11.542')]
|
4384 |
+
[2025-08-05 05:30:49,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 2289664. Throughput: 0: 940.3. Samples: 468676. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4385 |
+
[2025-08-05 05:30:49,061][34856] Avg episode reward: [(0, '11.607')]
|
4386 |
+
[2025-08-05 05:30:50,237][34917] Updated weights for policy 0, policy_version 561 (0.0034)
|
4387 |
+
[2025-08-05 05:30:54,060][34856] Fps is (10 sec: 4096.7, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 2310144. Throughput: 0: 941.5. Samples: 475232. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4388 |
+
[2025-08-05 05:30:54,061][34856] Avg episode reward: [(0, '12.533')]
|
4389 |
+
[2025-08-05 05:30:54,149][34895] Saving new best policy, reward=12.533!
|
4390 |
+
[2025-08-05 05:30:59,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 2326528. Throughput: 0: 939.7. Samples: 478078. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4391 |
+
[2025-08-05 05:30:59,061][34856] Avg episode reward: [(0, '12.707')]
|
4392 |
+
[2025-08-05 05:30:59,067][34895] Saving new best policy, reward=12.707!
|
4393 |
+
[2025-08-05 05:31:01,829][34917] Updated weights for policy 0, policy_version 571 (0.0022)
|
4394 |
+
[2025-08-05 05:31:04,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 2347008. Throughput: 0: 941.5. Samples: 482820. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4395 |
+
[2025-08-05 05:31:04,061][34856] Avg episode reward: [(0, '13.571')]
|
4396 |
+
[2025-08-05 05:31:04,062][34895] Saving new best policy, reward=13.571!
|
4397 |
+
[2025-08-05 05:31:09,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.9, 300 sec: 3693.3). Total num frames: 2367488. Throughput: 0: 939.5. Samples: 489208. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4398 |
+
[2025-08-05 05:31:09,061][34856] Avg episode reward: [(0, '14.390')]
|
4399 |
+
[2025-08-05 05:31:09,067][34895] Saving new best policy, reward=14.390!
|
4400 |
+
[2025-08-05 05:31:11,687][34917] Updated weights for policy 0, policy_version 581 (0.0032)
|
4401 |
+
[2025-08-05 05:31:14,060][34856] Fps is (10 sec: 3686.3, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 2383872. Throughput: 0: 937.0. Samples: 491960. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4402 |
+
[2025-08-05 05:31:14,061][34856] Avg episode reward: [(0, '13.731')]
|
4403 |
+
[2025-08-05 05:31:19,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3686.7, 300 sec: 3679.5). Total num frames: 2400256. Throughput: 0: 937.6. Samples: 496726. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4404 |
+
[2025-08-05 05:31:19,061][34856] Avg episode reward: [(0, '14.313')]
|
4405 |
+
[2025-08-05 05:31:22,943][34917] Updated weights for policy 0, policy_version 591 (0.0028)
|
4406 |
+
[2025-08-05 05:31:24,060][34856] Fps is (10 sec: 4096.1, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 2424832. Throughput: 0: 934.2. Samples: 503184. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
4407 |
+
[2025-08-05 05:31:24,061][34856] Avg episode reward: [(0, '14.669')]
|
4408 |
+
[2025-08-05 05:31:24,062][34895] Saving new best policy, reward=14.669!
|
4409 |
+
[2025-08-05 05:31:29,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 2441216. Throughput: 0: 934.8. Samples: 505956. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4410 |
+
[2025-08-05 05:31:29,061][34856] Avg episode reward: [(0, '15.383')]
|
4411 |
+
[2025-08-05 05:31:29,070][34895] Saving new best policy, reward=15.383!
|
4412 |
+
[2025-08-05 05:31:34,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 2457600. Throughput: 0: 934.7. Samples: 510738. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4413 |
+
[2025-08-05 05:31:34,061][34856] Avg episode reward: [(0, '15.847')]
|
4414 |
+
[2025-08-05 05:31:34,062][34895] Saving new best policy, reward=15.847!
|
4415 |
+
[2025-08-05 05:31:34,291][34917] Updated weights for policy 0, policy_version 601 (0.0028)
|
4416 |
+
[2025-08-05 05:31:39,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3693.3). Total num frames: 2478080. Throughput: 0: 931.6. Samples: 517156. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4417 |
+
[2025-08-05 05:31:39,061][34856] Avg episode reward: [(0, '18.223')]
|
4418 |
+
[2025-08-05 05:31:39,085][34895] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000606_2482176.pth...
|
4419 |
+
[2025-08-05 05:31:39,211][34895] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000389_1593344.pth
|
4420 |
+
[2025-08-05 05:31:39,219][34895] Saving new best policy, reward=18.223!
|
4421 |
+
[2025-08-05 05:31:44,063][34856] Fps is (10 sec: 3685.4, 60 sec: 3754.6, 300 sec: 3693.3). Total num frames: 2494464. Throughput: 0: 925.9. Samples: 519744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4422 |
+
[2025-08-05 05:31:44,064][34856] Avg episode reward: [(0, '18.628')]
|
4423 |
+
[2025-08-05 05:31:44,066][34895] Saving new best policy, reward=18.628!
|
4424 |
+
[2025-08-05 05:31:46,016][34917] Updated weights for policy 0, policy_version 611 (0.0035)
|
4425 |
+
[2025-08-05 05:31:49,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 2514944. Throughput: 0: 925.5. Samples: 524468. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0)
|
4426 |
+
[2025-08-05 05:31:49,061][34856] Avg episode reward: [(0, '19.526')]
|
4427 |
+
[2025-08-05 05:31:49,067][34895] Saving new best policy, reward=19.526!
|
4428 |
+
[2025-08-05 05:31:54,060][34856] Fps is (10 sec: 4097.1, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 2535424. Throughput: 0: 927.5. Samples: 530944. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4429 |
+
[2025-08-05 05:31:54,061][34856] Avg episode reward: [(0, '18.606')]
|
4430 |
+
[2025-08-05 05:31:55,533][34917] Updated weights for policy 0, policy_version 621 (0.0017)
|
4431 |
+
[2025-08-05 05:31:59,062][34856] Fps is (10 sec: 3685.7, 60 sec: 3754.6, 300 sec: 3693.3). Total num frames: 2551808. Throughput: 0: 927.8. Samples: 533714. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4432 |
+
[2025-08-05 05:31:59,063][34856] Avg episode reward: [(0, '20.046')]
|
4433 |
+
[2025-08-05 05:31:59,073][34895] Saving new best policy, reward=20.046!
|
4434 |
+
[2025-08-05 05:32:04,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 2568192. Throughput: 0: 928.6. Samples: 538512. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4435 |
+
[2025-08-05 05:32:04,061][34856] Avg episode reward: [(0, '18.573')]
|
4436 |
+
[2025-08-05 05:32:07,145][34917] Updated weights for policy 0, policy_version 631 (0.0028)
|
4437 |
+
[2025-08-05 05:32:09,060][34856] Fps is (10 sec: 4096.8, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 2592768. Throughput: 0: 931.6. Samples: 545108. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4438 |
+
[2025-08-05 05:32:09,061][34856] Avg episode reward: [(0, '19.164')]
|
4439 |
+
[2025-08-05 05:32:14,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3693.4). Total num frames: 2605056. Throughput: 0: 930.3. Samples: 547818. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4440 |
+
[2025-08-05 05:32:14,061][34856] Avg episode reward: [(0, '19.171')]
|
4441 |
+
[2025-08-05 05:32:18,343][34917] Updated weights for policy 0, policy_version 641 (0.0030)
|
4442 |
+
[2025-08-05 05:32:19,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 2625536. Throughput: 0: 932.3. Samples: 552692. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4443 |
+
[2025-08-05 05:32:19,061][34856] Avg episode reward: [(0, '18.097')]
|
4444 |
+
[2025-08-05 05:32:24,060][34856] Fps is (10 sec: 4505.7, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 2650112. Throughput: 0: 936.1. Samples: 559282. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4445 |
+
[2025-08-05 05:32:24,061][34856] Avg episode reward: [(0, '19.720')]
|
4446 |
+
[2025-08-05 05:32:29,033][34917] Updated weights for policy 0, policy_version 651 (0.0017)
|
4447 |
+
[2025-08-05 05:32:29,061][34856] Fps is (10 sec: 4095.6, 60 sec: 3754.6, 300 sec: 3707.2). Total num frames: 2666496. Throughput: 0: 939.5. Samples: 562022. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4448 |
+
[2025-08-05 05:32:29,064][34856] Avg episode reward: [(0, '18.688')]
|
4449 |
+
[2025-08-05 05:32:34,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 2682880. Throughput: 0: 943.7. Samples: 566936. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4450 |
+
[2025-08-05 05:32:34,061][34856] Avg episode reward: [(0, '18.289')]
|
4451 |
+
[2025-08-05 05:32:39,060][34856] Fps is (10 sec: 3686.7, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 2703360. Throughput: 0: 944.8. Samples: 573462. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4452 |
+
[2025-08-05 05:32:39,061][34856] Avg episode reward: [(0, '18.502')]
|
4453 |
+
[2025-08-05 05:32:39,288][34917] Updated weights for policy 0, policy_version 661 (0.0033)
|
4454 |
+
[2025-08-05 05:32:44,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.8, 300 sec: 3707.2). Total num frames: 2719744. Throughput: 0: 942.6. Samples: 576128. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4455 |
+
[2025-08-05 05:32:44,062][34856] Avg episode reward: [(0, '19.466')]
|
4456 |
+
[2025-08-05 05:32:49,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 2740224. Throughput: 0: 944.4. Samples: 581010. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4457 |
+
[2025-08-05 05:32:49,061][34856] Avg episode reward: [(0, '18.213')]
|
4458 |
+
[2025-08-05 05:32:50,563][34917] Updated weights for policy 0, policy_version 671 (0.0027)
|
4459 |
+
[2025-08-05 05:32:54,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 2760704. Throughput: 0: 941.8. Samples: 587490. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4460 |
+
[2025-08-05 05:32:54,061][34856] Avg episode reward: [(0, '19.987')]
|
4461 |
+
[2025-08-05 05:32:59,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3686.5, 300 sec: 3693.4). Total num frames: 2772992. Throughput: 0: 938.1. Samples: 590032. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4462 |
+
[2025-08-05 05:32:59,063][34856] Avg episode reward: [(0, '19.896')]
|
4463 |
+
[2025-08-05 05:33:03,939][34917] Updated weights for policy 0, policy_version 681 (0.0034)
|
4464 |
+
[2025-08-05 05:33:04,060][34856] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 2789376. Throughput: 0: 900.2. Samples: 593200. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4465 |
+
[2025-08-05 05:33:04,061][34856] Avg episode reward: [(0, '20.100')]
|
4466 |
+
[2025-08-05 05:33:04,062][34895] Saving new best policy, reward=20.100!
|
4467 |
+
[2025-08-05 05:33:09,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 2809856. Throughput: 0: 893.5. Samples: 599490. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4468 |
+
[2025-08-05 05:33:09,061][34856] Avg episode reward: [(0, '19.561')]
|
4469 |
+
[2025-08-05 05:33:14,067][34856] Fps is (10 sec: 3683.9, 60 sec: 3686.0, 300 sec: 3693.3). Total num frames: 2826240. Throughput: 0: 904.1. Samples: 602712. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4470 |
+
[2025-08-05 05:33:14,068][34856] Avg episode reward: [(0, '21.468')]
|
4471 |
+
[2025-08-05 05:33:14,068][34895] Saving new best policy, reward=21.468!
|
4472 |
+
[2025-08-05 05:33:14,075][34917] Updated weights for policy 0, policy_version 691 (0.0026)
|
4473 |
+
[2025-08-05 05:33:19,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 2842624. Throughput: 0: 890.3. Samples: 607000. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4474 |
+
[2025-08-05 05:33:19,061][34856] Avg episode reward: [(0, '21.684')]
|
4475 |
+
[2025-08-05 05:33:19,066][34895] Saving new best policy, reward=21.684!
|
4476 |
+
[2025-08-05 05:33:24,060][34856] Fps is (10 sec: 4098.8, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 2867200. Throughput: 0: 888.8. Samples: 613458. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4477 |
+
[2025-08-05 05:33:24,061][34856] Avg episode reward: [(0, '21.212')]
|
4478 |
+
[2025-08-05 05:33:25,012][34917] Updated weights for policy 0, policy_version 701 (0.0029)
|
4479 |
+
[2025-08-05 05:33:29,064][34856] Fps is (10 sec: 4094.3, 60 sec: 3617.9, 300 sec: 3693.3). Total num frames: 2883584. Throughput: 0: 901.0. Samples: 616678. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4480 |
+
[2025-08-05 05:33:29,065][34856] Avg episode reward: [(0, '21.668')]
|
4481 |
+
[2025-08-05 05:33:34,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 2899968. Throughput: 0: 891.6. Samples: 621134. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4482 |
+
[2025-08-05 05:33:34,061][34856] Avg episode reward: [(0, '20.431')]
|
4483 |
+
[2025-08-05 05:33:36,538][34917] Updated weights for policy 0, policy_version 711 (0.0027)
|
4484 |
+
[2025-08-05 05:33:39,061][34856] Fps is (10 sec: 3687.6, 60 sec: 3618.1, 300 sec: 3693.4). Total num frames: 2920448. Throughput: 0: 891.1. Samples: 627590. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4485 |
+
[2025-08-05 05:33:39,062][34856] Avg episode reward: [(0, '19.447')]
|
4486 |
+
[2025-08-05 05:33:39,071][34895] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000713_2920448.pth...
|
4487 |
+
[2025-08-05 05:33:39,199][34895] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000496_2031616.pth
|
4488 |
+
[2025-08-05 05:33:44,061][34856] Fps is (10 sec: 4095.8, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 2940928. Throughput: 0: 906.2. Samples: 630810. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4489 |
+
[2025-08-05 05:33:44,061][34856] Avg episode reward: [(0, '17.275')]
|
4490 |
+
[2025-08-05 05:33:48,130][34917] Updated weights for policy 0, policy_version 721 (0.0063)
|
4491 |
+
[2025-08-05 05:33:49,060][34856] Fps is (10 sec: 3686.7, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 2957312. Throughput: 0: 933.0. Samples: 635184. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4492 |
+
[2025-08-05 05:33:49,061][34856] Avg episode reward: [(0, '15.179')]
|
4493 |
+
[2025-08-05 05:33:54,060][34856] Fps is (10 sec: 3686.6, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 2977792. Throughput: 0: 938.7. Samples: 641732. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4494 |
+
[2025-08-05 05:33:54,061][34856] Avg episode reward: [(0, '15.422')]
|
4495 |
+
[2025-08-05 05:33:57,444][34917] Updated weights for policy 0, policy_version 731 (0.0045)
|
4496 |
+
[2025-08-05 05:33:59,061][34856] Fps is (10 sec: 4095.9, 60 sec: 3754.6, 300 sec: 3735.0). Total num frames: 2998272. Throughput: 0: 939.8. Samples: 644996. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4497 |
+
[2025-08-05 05:33:59,061][34856] Avg episode reward: [(0, '16.137')]
|
4498 |
+
[2025-08-05 05:34:04,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 3014656. Throughput: 0: 943.4. Samples: 649452. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
4499 |
+
[2025-08-05 05:34:04,061][34856] Avg episode reward: [(0, '16.393')]
|
4500 |
+
[2025-08-05 05:34:08,591][34917] Updated weights for policy 0, policy_version 741 (0.0040)
|
4501 |
+
[2025-08-05 05:34:09,060][34856] Fps is (10 sec: 3686.5, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3035136. Throughput: 0: 942.7. Samples: 655880. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4502 |
+
[2025-08-05 05:34:09,061][34856] Avg episode reward: [(0, '18.219')]
|
4503 |
+
[2025-08-05 05:34:14,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3755.1, 300 sec: 3721.1). Total num frames: 3051520. Throughput: 0: 944.3. Samples: 659166. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4504 |
+
[2025-08-05 05:34:14,061][34856] Avg episode reward: [(0, '18.248')]
|
4505 |
+
[2025-08-05 05:34:19,061][34856] Fps is (10 sec: 3276.7, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 3067904. Throughput: 0: 942.8. Samples: 663562. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
4506 |
+
[2025-08-05 05:34:19,061][34856] Avg episode reward: [(0, '19.402')]
|
4507 |
+
[2025-08-05 05:34:20,160][34917] Updated weights for policy 0, policy_version 751 (0.0027)
|
4508 |
+
[2025-08-05 05:34:24,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3092480. Throughput: 0: 944.3. Samples: 670082. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4509 |
+
[2025-08-05 05:34:24,061][34856] Avg episode reward: [(0, '20.228')]
|
4510 |
+
[2025-08-05 05:34:29,060][34856] Fps is (10 sec: 4096.1, 60 sec: 3754.9, 300 sec: 3735.0). Total num frames: 3108864. Throughput: 0: 944.1. Samples: 673292. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4511 |
+
[2025-08-05 05:34:29,061][34856] Avg episode reward: [(0, '19.687')]
|
4512 |
+
[2025-08-05 05:34:31,159][34917] Updated weights for policy 0, policy_version 761 (0.0039)
|
4513 |
+
[2025-08-05 05:34:34,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 3125248. Throughput: 0: 944.8. Samples: 677702. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4514 |
+
[2025-08-05 05:34:34,061][34856] Avg episode reward: [(0, '19.064')]
|
4515 |
+
[2025-08-05 05:34:39,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3735.0). Total num frames: 3149824. Throughput: 0: 944.5. Samples: 684236. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4516 |
+
[2025-08-05 05:34:39,061][34856] Avg episode reward: [(0, '19.597')]
|
4517 |
+
[2025-08-05 05:34:40,894][34917] Updated weights for policy 0, policy_version 771 (0.0023)
|
4518 |
+
[2025-08-05 05:34:44,063][34856] Fps is (10 sec: 4094.8, 60 sec: 3754.5, 300 sec: 3735.0). Total num frames: 3166208. Throughput: 0: 944.5. Samples: 687500. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4519 |
+
[2025-08-05 05:34:44,064][34856] Avg episode reward: [(0, '17.663')]
|
4520 |
+
[2025-08-05 05:34:49,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 3182592. Throughput: 0: 940.5. Samples: 691776. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4521 |
+
[2025-08-05 05:34:49,061][34856] Avg episode reward: [(0, '17.406')]
|
4522 |
+
[2025-08-05 05:34:52,391][34917] Updated weights for policy 0, policy_version 781 (0.0019)
|
4523 |
+
[2025-08-05 05:34:54,060][34856] Fps is (10 sec: 3687.5, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3203072. Throughput: 0: 942.8. Samples: 698306. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4524 |
+
[2025-08-05 05:34:54,061][34856] Avg episode reward: [(0, '17.654')]
|
4525 |
+
[2025-08-05 05:34:59,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3223552. Throughput: 0: 943.2. Samples: 701610. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4526 |
+
[2025-08-05 05:34:59,061][34856] Avg episode reward: [(0, '16.206')]
|
4527 |
+
[2025-08-05 05:35:03,650][34917] Updated weights for policy 0, policy_version 791 (0.0032)
|
4528 |
+
[2025-08-05 05:35:04,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3721.2). Total num frames: 3239936. Throughput: 0: 942.6. Samples: 705978. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4529 |
+
[2025-08-05 05:35:04,061][34856] Avg episode reward: [(0, '15.946')]
|
4530 |
+
[2025-08-05 05:35:09,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3260416. Throughput: 0: 943.1. Samples: 712520. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4531 |
+
[2025-08-05 05:35:09,061][34856] Avg episode reward: [(0, '16.495')]
|
4532 |
+
[2025-08-05 05:35:13,695][34917] Updated weights for policy 0, policy_version 801 (0.0020)
|
4533 |
+
[2025-08-05 05:35:14,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3735.1). Total num frames: 3280896. Throughput: 0: 944.8. Samples: 715810. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4534 |
+
[2025-08-05 05:35:14,065][34856] Avg episode reward: [(0, '15.999')]
|
4535 |
+
[2025-08-05 05:35:19,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 3297280. Throughput: 0: 943.6. Samples: 720162. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4536 |
+
[2025-08-05 05:35:19,061][34856] Avg episode reward: [(0, '17.446')]
|
4537 |
+
[2025-08-05 05:35:24,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3317760. Throughput: 0: 944.2. Samples: 726724. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4538 |
+
[2025-08-05 05:35:24,061][34856] Avg episode reward: [(0, '16.964')]
|
4539 |
+
[2025-08-05 05:35:24,476][34917] Updated weights for policy 0, policy_version 811 (0.0027)
|
4540 |
+
[2025-08-05 05:35:29,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 3338240. Throughput: 0: 945.0. Samples: 730024. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4541 |
+
[2025-08-05 05:35:29,064][34856] Avg episode reward: [(0, '17.003')]
|
4542 |
+
[2025-08-05 05:35:34,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 3354624. Throughput: 0: 948.6. Samples: 734462. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4543 |
+
[2025-08-05 05:35:34,061][34856] Avg episode reward: [(0, '18.032')]
|
4544 |
+
[2025-08-05 05:35:35,814][34917] Updated weights for policy 0, policy_version 821 (0.0025)
|
4545 |
+
[2025-08-05 05:35:39,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 3375104. Throughput: 0: 947.0. Samples: 740922. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4546 |
+
[2025-08-05 05:35:39,061][34856] Avg episode reward: [(0, '18.232')]
|
4547 |
+
[2025-08-05 05:35:39,074][34895] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000824_3375104.pth...
|
4548 |
+
[2025-08-05 05:35:39,188][34895] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000606_2482176.pth
|
4549 |
+
[2025-08-05 05:35:44,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.9, 300 sec: 3735.0). Total num frames: 3391488. Throughput: 0: 943.8. Samples: 744082. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4550 |
+
[2025-08-05 05:35:44,068][34856] Avg episode reward: [(0, '18.908')]
|
4551 |
+
[2025-08-05 05:35:47,434][34917] Updated weights for policy 0, policy_version 831 (0.0020)
|
4552 |
+
[2025-08-05 05:35:49,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 3407872. Throughput: 0: 941.5. Samples: 748346. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4553 |
+
[2025-08-05 05:35:49,061][34856] Avg episode reward: [(0, '19.665')]
|
4554 |
+
[2025-08-05 05:35:54,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 3432448. Throughput: 0: 941.7. Samples: 754898. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4555 |
+
[2025-08-05 05:35:54,061][34856] Avg episode reward: [(0, '20.404')]
|
4556 |
+
[2025-08-05 05:35:56,788][34917] Updated weights for policy 0, policy_version 841 (0.0020)
|
4557 |
+
[2025-08-05 05:35:59,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3448832. Throughput: 0: 942.7. Samples: 758232. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4558 |
+
[2025-08-05 05:35:59,061][34856] Avg episode reward: [(0, '20.461')]
|
4559 |
+
[2025-08-05 05:36:04,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 3465216. Throughput: 0: 941.0. Samples: 762506. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4560 |
+
[2025-08-05 05:36:04,061][34856] Avg episode reward: [(0, '21.303')]
|
4561 |
+
[2025-08-05 05:36:08,221][34917] Updated weights for policy 0, policy_version 851 (0.0027)
|
4562 |
+
[2025-08-05 05:36:09,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3485696. Throughput: 0: 940.7. Samples: 769056. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4563 |
+
[2025-08-05 05:36:09,061][34856] Avg episode reward: [(0, '21.065')]
|
4564 |
+
[2025-08-05 05:36:14,063][34856] Fps is (10 sec: 4094.9, 60 sec: 3754.5, 300 sec: 3748.8). Total num frames: 3506176. Throughput: 0: 940.2. Samples: 772336. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4565 |
+
[2025-08-05 05:36:14,063][34856] Avg episode reward: [(0, '21.550')]
|
4566 |
+
[2025-08-05 05:36:19,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 3522560. Throughput: 0: 937.8. Samples: 776662. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4567 |
+
[2025-08-05 05:36:19,061][34856] Avg episode reward: [(0, '21.597')]
|
4568 |
+
[2025-08-05 05:36:19,533][34917] Updated weights for policy 0, policy_version 861 (0.0018)
|
4569 |
+
[2025-08-05 05:36:24,060][34856] Fps is (10 sec: 3687.3, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3543040. Throughput: 0: 940.0. Samples: 783220. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4570 |
+
[2025-08-05 05:36:24,061][34856] Avg episode reward: [(0, '21.824')]
|
4571 |
+
[2025-08-05 05:36:24,062][34895] Saving new best policy, reward=21.824!
|
4572 |
+
[2025-08-05 05:36:29,062][34856] Fps is (10 sec: 4095.3, 60 sec: 3754.6, 300 sec: 3748.9). Total num frames: 3563520. Throughput: 0: 942.8. Samples: 786510. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4573 |
+
[2025-08-05 05:36:29,063][34856] Avg episode reward: [(0, '21.908')]
|
4574 |
+
[2025-08-05 05:36:29,071][34895] Saving new best policy, reward=21.908!
|
4575 |
+
[2025-08-05 05:36:30,641][34917] Updated weights for policy 0, policy_version 871 (0.0031)
|
4576 |
+
[2025-08-05 05:36:34,062][34856] Fps is (10 sec: 3685.9, 60 sec: 3754.6, 300 sec: 3735.0). Total num frames: 3579904. Throughput: 0: 944.7. Samples: 790860. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4577 |
+
[2025-08-05 05:36:34,063][34856] Avg episode reward: [(0, '22.461')]
|
4578 |
+
[2025-08-05 05:36:34,063][34895] Saving new best policy, reward=22.461!
|
4579 |
+
[2025-08-05 05:36:39,060][34856] Fps is (10 sec: 3687.0, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 3600384. Throughput: 0: 942.9. Samples: 797328. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4580 |
+
[2025-08-05 05:36:39,061][34856] Avg episode reward: [(0, '20.885')]
|
4581 |
+
[2025-08-05 05:36:40,460][34917] Updated weights for policy 0, policy_version 881 (0.0036)
|
4582 |
+
[2025-08-05 05:36:44,060][34856] Fps is (10 sec: 3687.0, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3616768. Throughput: 0: 941.0. Samples: 800576. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4583 |
+
[2025-08-05 05:36:44,061][34856] Avg episode reward: [(0, '21.274')]
|
4584 |
+
[2025-08-05 05:36:49,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 3633152. Throughput: 0: 942.4. Samples: 804912. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4585 |
+
[2025-08-05 05:36:49,061][34856] Avg episode reward: [(0, '21.060')]
|
4586 |
+
[2025-08-05 05:36:52,044][34917] Updated weights for policy 0, policy_version 891 (0.0021)
|
4587 |
+
[2025-08-05 05:36:54,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 3657728. Throughput: 0: 942.5. Samples: 811470. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4588 |
+
[2025-08-05 05:36:54,061][34856] Avg episode reward: [(0, '20.981')]
|
4589 |
+
[2025-08-05 05:36:59,064][34856] Fps is (10 sec: 4094.5, 60 sec: 3754.4, 300 sec: 3748.8). Total num frames: 3674112. Throughput: 0: 940.5. Samples: 814658. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4590 |
+
[2025-08-05 05:36:59,065][34856] Avg episode reward: [(0, '20.663')]
|
4591 |
+
[2025-08-05 05:37:03,215][34917] Updated weights for policy 0, policy_version 901 (0.0026)
|
4592 |
+
[2025-08-05 05:37:04,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 3690496. Throughput: 0: 943.2. Samples: 819106. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4593 |
+
[2025-08-05 05:37:04,061][34856] Avg episode reward: [(0, '21.206')]
|
4594 |
+
[2025-08-05 05:37:09,060][34856] Fps is (10 sec: 3278.0, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 3706880. Throughput: 0: 909.5. Samples: 824148. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
4595 |
+
[2025-08-05 05:37:09,061][34856] Avg episode reward: [(0, '20.545')]
|
4596 |
+
[2025-08-05 05:37:14,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3618.3, 300 sec: 3721.1). Total num frames: 3723264. Throughput: 0: 888.3. Samples: 826480. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4597 |
+
[2025-08-05 05:37:14,061][34856] Avg episode reward: [(0, '20.033')]
|
4598 |
+
[2025-08-05 05:37:16,646][34917] Updated weights for policy 0, policy_version 911 (0.0022)
|
4599 |
+
[2025-08-05 05:37:19,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 3739648. Throughput: 0: 891.4. Samples: 830970. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4600 |
+
[2025-08-05 05:37:19,061][34856] Avg episode reward: [(0, '20.089')]
|
4601 |
+
[2025-08-05 05:37:24,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 3760128. Throughput: 0: 891.5. Samples: 837444. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4602 |
+
[2025-08-05 05:37:24,061][34856] Avg episode reward: [(0, '19.864')]
|
4603 |
+
[2025-08-05 05:37:26,584][34917] Updated weights for policy 0, policy_version 921 (0.0017)
|
4604 |
+
[2025-08-05 05:37:29,062][34856] Fps is (10 sec: 3685.6, 60 sec: 3549.8, 300 sec: 3707.2). Total num frames: 3776512. Throughput: 0: 886.3. Samples: 840460. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4605 |
+
[2025-08-05 05:37:29,063][34856] Avg episode reward: [(0, '19.525')]
|
4606 |
+
[2025-08-05 05:37:34,060][34856] Fps is (10 sec: 3686.3, 60 sec: 3618.2, 300 sec: 3707.2). Total num frames: 3796992. Throughput: 0: 891.0. Samples: 845008. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4607 |
+
[2025-08-05 05:37:34,061][34856] Avg episode reward: [(0, '19.571')]
|
4608 |
+
[2025-08-05 05:37:37,765][34917] Updated weights for policy 0, policy_version 931 (0.0032)
|
4609 |
+
[2025-08-05 05:37:39,061][34856] Fps is (10 sec: 4096.7, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 3817472. Throughput: 0: 891.8. Samples: 851602. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4610 |
+
[2025-08-05 05:37:39,061][34856] Avg episode reward: [(0, '19.453')]
|
4611 |
+
[2025-08-05 05:37:39,068][34895] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000932_3817472.pth...
|
4612 |
+
[2025-08-05 05:37:39,204][34895] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000713_2920448.pth
|
4613 |
+
[2025-08-05 05:37:44,060][34856] Fps is (10 sec: 3686.5, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 3833856. Throughput: 0: 885.8. Samples: 854514. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
4614 |
+
[2025-08-05 05:37:44,061][34856] Avg episode reward: [(0, '20.490')]
|
4615 |
+
[2025-08-05 05:37:49,060][34856] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 3850240. Throughput: 0: 890.0. Samples: 859156. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4616 |
+
[2025-08-05 05:37:49,065][34856] Avg episode reward: [(0, '21.331')]
|
4617 |
+
[2025-08-05 05:37:49,168][34917] Updated weights for policy 0, policy_version 941 (0.0033)
|
4618 |
+
[2025-08-05 05:37:54,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 3874816. Throughput: 0: 921.2. Samples: 865600. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4619 |
+
[2025-08-05 05:37:54,061][34856] Avg episode reward: [(0, '20.199')]
|
4620 |
+
[2025-08-05 05:37:59,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3618.4, 300 sec: 3735.0). Total num frames: 3891200. Throughput: 0: 934.4. Samples: 868528. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4621 |
+
[2025-08-05 05:37:59,061][34856] Avg episode reward: [(0, '20.495')]
|
4622 |
+
[2025-08-05 05:38:00,253][34917] Updated weights for policy 0, policy_version 951 (0.0028)
|
4623 |
+
[2025-08-05 05:38:04,060][34856] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 3907584. Throughput: 0: 936.8. Samples: 873128. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
4624 |
+
[2025-08-05 05:38:04,061][34856] Avg episode reward: [(0, '20.971')]
|
4625 |
+
[2025-08-05 05:38:09,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3735.1). Total num frames: 3928064. Throughput: 0: 938.1. Samples: 879660. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
4626 |
+
[2025-08-05 05:38:09,061][34856] Avg episode reward: [(0, '19.447')]
|
4627 |
+
[2025-08-05 05:38:10,165][34917] Updated weights for policy 0, policy_version 961 (0.0035)
|
4628 |
+
[2025-08-05 05:38:14,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 3944448. Throughput: 0: 935.8. Samples: 882570. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
4629 |
+
[2025-08-05 05:38:14,061][34856] Avg episode reward: [(0, '19.765')]
|
4630 |
+
[2025-08-05 05:38:19,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 3964928. Throughput: 0: 939.4. Samples: 887282. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4631 |
+
[2025-08-05 05:38:19,061][34856] Avg episode reward: [(0, '21.100')]
|
4632 |
+
[2025-08-05 05:38:21,635][34917] Updated weights for policy 0, policy_version 971 (0.0035)
|
4633 |
+
[2025-08-05 05:38:24,060][34856] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3985408. Throughput: 0: 935.7. Samples: 893708. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
4634 |
+
[2025-08-05 05:38:24,061][34856] Avg episode reward: [(0, '19.919')]
|
4635 |
+
[2025-08-05 05:38:29,060][34856] Fps is (10 sec: 3686.4, 60 sec: 3754.8, 300 sec: 3735.0). Total num frames: 4001792. Throughput: 0: 936.2. Samples: 896642. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
4636 |
+
[2025-08-05 05:38:29,062][34856] Avg episode reward: [(0, '20.034')]
|
4637 |
+
[2025-08-05 05:38:29,788][34856] Component Batcher_0 stopped!
|
4638 |
+
[2025-08-05 05:38:29,781][34895] Stopping Batcher_0...
|
4639 |
+
[2025-08-05 05:38:29,784][34895] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
|
4640 |
+
[2025-08-05 05:38:29,789][34895] Loop batcher_evt_loop terminating...
|
4641 |
+
[2025-08-05 05:38:29,867][34917] Weights refcount: 2 0
|
4642 |
+
[2025-08-05 05:38:29,869][34856] Component InferenceWorker_p0-w0 stopped!
|
4643 |
+
[2025-08-05 05:38:29,869][34917] Stopping InferenceWorker_p0-w0...
|
4644 |
+
[2025-08-05 05:38:29,872][34917] Loop inference_proc0-0_evt_loop terminating...
|
4645 |
+
[2025-08-05 05:38:29,978][34895] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000824_3375104.pth
|
4646 |
+
[2025-08-05 05:38:29,990][34895] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
|
4647 |
+
[2025-08-05 05:38:30,221][34895] Stopping LearnerWorker_p0...
|
4648 |
+
[2025-08-05 05:38:30,222][34895] Loop learner_proc0_evt_loop terminating...
|
4649 |
+
[2025-08-05 05:38:30,224][34856] Component LearnerWorker_p0 stopped!
|
4650 |
+
[2025-08-05 05:38:30,413][34921] Stopping RolloutWorker_w2...
|
4651 |
+
[2025-08-05 05:38:30,413][34856] Component RolloutWorker_w2 stopped!
|
4652 |
+
[2025-08-05 05:38:30,414][34921] Loop rollout_proc2_evt_loop terminating...
|
4653 |
+
[2025-08-05 05:38:30,426][34856] Component RolloutWorker_w1 stopped!
|
4654 |
+
[2025-08-05 05:38:30,426][34923] Stopping RolloutWorker_w1...
|
4655 |
+
[2025-08-05 05:38:30,440][34856] Component RolloutWorker_w0 stopped!
|
4656 |
+
[2025-08-05 05:38:30,444][34920] Stopping RolloutWorker_w0...
|
4657 |
+
[2025-08-05 05:38:30,436][34923] Loop rollout_proc1_evt_loop terminating...
|
4658 |
+
[2025-08-05 05:38:30,452][34856] Component RolloutWorker_w4 stopped!
|
4659 |
+
[2025-08-05 05:38:30,452][34919] Stopping RolloutWorker_w4...
|
4660 |
+
[2025-08-05 05:38:30,453][34919] Loop rollout_proc4_evt_loop terminating...
|
4661 |
+
[2025-08-05 05:38:30,455][34856] Component RolloutWorker_w6 stopped!
|
4662 |
+
[2025-08-05 05:38:30,456][34926] Stopping RolloutWorker_w6...
|
4663 |
+
[2025-08-05 05:38:30,456][34926] Loop rollout_proc6_evt_loop terminating...
|
4664 |
+
[2025-08-05 05:38:30,469][34920] Loop rollout_proc0_evt_loop terminating...
|
4665 |
+
[2025-08-05 05:38:30,483][34856] Component RolloutWorker_w3 stopped!
|
4666 |
+
[2025-08-05 05:38:30,482][34922] Stopping RolloutWorker_w3...
|
4667 |
+
[2025-08-05 05:38:30,489][34922] Loop rollout_proc3_evt_loop terminating...
|
4668 |
+
[2025-08-05 05:38:30,511][34856] Component RolloutWorker_w5 stopped!
|
4669 |
+
[2025-08-05 05:38:30,511][34924] Stopping RolloutWorker_w5...
|
4670 |
+
[2025-08-05 05:38:30,523][34856] Component RolloutWorker_w7 stopped!
|
4671 |
+
[2025-08-05 05:38:30,523][34856] Waiting for process learner_proc0 to stop...
|
4672 |
+
[2025-08-05 05:38:30,513][34924] Loop rollout_proc5_evt_loop terminating...
|
4673 |
+
[2025-08-05 05:38:30,524][34925] Stopping RolloutWorker_w7...
|
4674 |
+
[2025-08-05 05:38:30,538][34925] Loop rollout_proc7_evt_loop terminating...
|
4675 |
+
[2025-08-05 05:38:32,020][34856] Waiting for process inference_proc0-0 to join...
|
4676 |
+
[2025-08-05 05:38:32,020][34856] Waiting for process rollout_proc0 to join...
|
4677 |
+
[2025-08-05 05:38:34,404][34856] Waiting for process rollout_proc1 to join...
|
4678 |
+
[2025-08-05 05:38:34,405][34856] Waiting for process rollout_proc2 to join...
|
4679 |
+
[2025-08-05 05:38:34,405][34856] Waiting for process rollout_proc3 to join...
|
4680 |
+
[2025-08-05 05:38:34,406][34856] Waiting for process rollout_proc4 to join...
|
4681 |
+
[2025-08-05 05:38:34,406][34856] Waiting for process rollout_proc5 to join...
|
4682 |
+
[2025-08-05 05:38:34,406][34856] Waiting for process rollout_proc6 to join...
|
4683 |
+
[2025-08-05 05:38:34,407][34856] Waiting for process rollout_proc7 to join...
|
4684 |
+
[2025-08-05 05:38:34,407][34856] Batcher 0 profile tree view:
|
4685 |
+
batching: 24.7330, releasing_batches: 0.0287
|
4686 |
+
[2025-08-05 05:38:34,407][34856] InferenceWorker_p0-w0 profile tree view:
|
4687 |
+
wait_policy: 0.0000
|
4688 |
+
wait_policy_total: 336.3687
|
4689 |
+
update_model: 8.4946
|
4690 |
+
weight_update: 0.0033
|
4691 |
+
one_step: 0.0238
|
4692 |
+
handle_policy_step: 591.7193
|
4693 |
+
deserialize: 14.0564, stack: 3.3023, obs_to_device_normalize: 122.4168, forward: 296.0190, send_messages: 32.8914
|
4694 |
+
prepare_outputs: 89.3863
|
4695 |
+
to_cpu: 53.1572
|
4696 |
+
[2025-08-05 05:38:34,408][34856] Learner 0 profile tree view:
|
4697 |
+
misc: 0.0051, prepare_batch: 11.7291
|
4698 |
+
train: 68.8930
|
4699 |
+
epoch_init: 0.0084, minibatch_init: 0.0082, losses_postprocess: 0.6004, kl_divergence: 0.6451, after_optimizer: 3.6092
|
4700 |
+
calculate_losses: 23.5806
|
4701 |
+
losses_init: 0.0107, forward_head: 1.2715, bptt_initial: 15.3763, tail: 1.2361, advantages_returns: 0.2735, losses: 3.3352
|
4702 |
+
bptt: 1.7456
|
4703 |
+
bptt_forward_core: 1.6625
|
4704 |
+
update: 39.7557
|
4705 |
+
clip: 0.9548
|
4706 |
+
[2025-08-05 05:38:34,408][34856] RolloutWorker_w0 profile tree view:
|
4707 |
+
wait_for_trajectories: 0.3045, enqueue_policy_requests: 80.5060, env_step: 769.3270, overhead: 13.2862, complete_rollouts: 7.0973
|
4708 |
+
save_policy_outputs: 28.4680
|
4709 |
+
split_output_tensors: 9.1205
|
4710 |
+
[2025-08-05 05:38:34,409][34856] RolloutWorker_w7 profile tree view:
|
4711 |
+
wait_for_trajectories: 0.3715, enqueue_policy_requests: 89.7619, env_step: 758.5926, overhead: 13.3971, complete_rollouts: 5.7492
|
4712 |
+
save_policy_outputs: 28.4995
|
4713 |
+
split_output_tensors: 9.1410
|
4714 |
+
[2025-08-05 05:38:34,410][34856] Loop Runner_EvtLoop terminating...
|
4715 |
+
[2025-08-05 05:38:34,410][34856] Runner profile tree view:
|
4716 |
+
main_loop: 1010.6880
|
4717 |
+
[2025-08-05 05:38:34,411][34856] Collected {0: 4005888}, FPS: 3554.2
|
4718 |
+
[2025-08-05 05:38:34,637][34856] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json
|
4719 |
+
[2025-08-05 05:38:34,637][34856] Overriding arg 'num_workers' with value 1 passed from command line
|
4720 |
+
[2025-08-05 05:38:34,637][34856] Adding new argument 'no_render'=True that is not in the saved config file!
|
4721 |
+
[2025-08-05 05:38:34,637][34856] Adding new argument 'save_video'=True that is not in the saved config file!
|
4722 |
+
[2025-08-05 05:38:34,637][34856] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
4723 |
+
[2025-08-05 05:38:34,637][34856] Adding new argument 'video_name'=None that is not in the saved config file!
|
4724 |
+
[2025-08-05 05:38:34,638][34856] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
|
4725 |
+
[2025-08-05 05:38:34,638][34856] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
4726 |
+
[2025-08-05 05:38:34,638][34856] Adding new argument 'push_to_hub'=True that is not in the saved config file!
|
4727 |
+
[2025-08-05 05:38:34,638][34856] Adding new argument 'hf_repository'='naveen1divakar/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
|
4728 |
+
[2025-08-05 05:38:34,638][34856] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
4729 |
+
[2025-08-05 05:38:34,639][34856] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
4730 |
+
[2025-08-05 05:38:34,639][34856] Adding new argument 'train_script'=None that is not in the saved config file!
|
4731 |
+
[2025-08-05 05:38:34,639][34856] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
4732 |
+
[2025-08-05 05:38:34,639][34856] Using frameskip 1 and render_action_repeat=4 for evaluation
|
4733 |
+
[2025-08-05 05:38:34,666][34856] Doom resolution: 160x120, resize resolution: (128, 72)
|
4734 |
+
[2025-08-05 05:38:34,668][34856] RunningMeanStd input shape: (3, 72, 128)
|
4735 |
+
[2025-08-05 05:38:34,669][34856] RunningMeanStd input shape: (1,)
|
4736 |
+
[2025-08-05 05:38:34,685][34856] ConvEncoder: input_channels=3
|
4737 |
+
[2025-08-05 05:38:34,813][34856] Conv encoder output size: 512
|
4738 |
+
[2025-08-05 05:38:34,814][34856] Policy head output size: 512
|
4739 |
+
[2025-08-05 05:38:35,075][34856] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
|
4740 |
+
[2025-08-05 05:38:35,850][34856] Num frames 100...
|
4741 |
+
[2025-08-05 05:38:35,978][34856] Num frames 200...
|
4742 |
+
[2025-08-05 05:38:36,102][34856] Num frames 300...
|
4743 |
+
[2025-08-05 05:38:36,234][34856] Num frames 400...
|
4744 |
+
[2025-08-05 05:38:36,363][34856] Num frames 500...
|
4745 |
+
[2025-08-05 05:38:36,491][34856] Num frames 600...
|
4746 |
+
[2025-08-05 05:38:36,616][34856] Num frames 700...
|
4747 |
+
[2025-08-05 05:38:36,749][34856] Num frames 800...
|
4748 |
+
[2025-08-05 05:38:36,895][34856] Num frames 900...
|
4749 |
+
[2025-08-05 05:38:37,022][34856] Num frames 1000...
|
4750 |
+
[2025-08-05 05:38:37,151][34856] Num frames 1100...
|
4751 |
+
[2025-08-05 05:38:37,278][34856] Num frames 1200...
|
4752 |
+
[2025-08-05 05:38:37,410][34856] Num frames 1300...
|
4753 |
+
[2025-08-05 05:38:37,537][34856] Num frames 1400...
|
4754 |
+
[2025-08-05 05:38:37,667][34856] Num frames 1500...
|
4755 |
+
[2025-08-05 05:38:37,726][34856] Avg episode rewards: #0: 37.040, true rewards: #0: 15.040
|
4756 |
+
[2025-08-05 05:38:37,727][34856] Avg episode reward: 37.040, avg true_objective: 15.040
|
4757 |
+
[2025-08-05 05:38:37,846][34856] Num frames 1600...
|
4758 |
+
[2025-08-05 05:38:37,989][34856] Num frames 1700...
|
4759 |
+
[2025-08-05 05:38:38,116][34856] Num frames 1800...
|
4760 |
+
[2025-08-05 05:38:38,239][34856] Num frames 1900...
|
4761 |
+
[2025-08-05 05:38:38,369][34856] Num frames 2000...
|
4762 |
+
[2025-08-05 05:38:38,435][34856] Avg episode rewards: #0: 23.545, true rewards: #0: 10.045
|
4763 |
+
[2025-08-05 05:38:38,435][34856] Avg episode reward: 23.545, avg true_objective: 10.045
|
4764 |
+
[2025-08-05 05:38:38,551][34856] Num frames 2100...
|
4765 |
+
[2025-08-05 05:38:38,674][34856] Num frames 2200...
|
4766 |
+
[2025-08-05 05:38:38,800][34856] Num frames 2300...
|
4767 |
+
[2025-08-05 05:38:38,936][34856] Num frames 2400...
|
4768 |
+
[2025-08-05 05:38:39,067][34856] Num frames 2500...
|
4769 |
+
[2025-08-05 05:38:39,203][34856] Num frames 2600...
|
4770 |
+
[2025-08-05 05:38:39,279][34856] Avg episode rewards: #0: 18.723, true rewards: #0: 8.723
|
4771 |
+
[2025-08-05 05:38:39,279][34856] Avg episode reward: 18.723, avg true_objective: 8.723
|
4772 |
+
[2025-08-05 05:38:39,383][34856] Num frames 2700...
|
4773 |
+
[2025-08-05 05:38:39,517][34856] Num frames 2800...
|
4774 |
+
[2025-08-05 05:38:39,644][34856] Num frames 2900...
|
4775 |
+
[2025-08-05 05:38:39,769][34856] Num frames 3000...
|
4776 |
+
[2025-08-05 05:38:39,898][34856] Num frames 3100...
|
4777 |
+
[2025-08-05 05:38:40,036][34856] Num frames 3200...
|
4778 |
+
[2025-08-05 05:38:40,161][34856] Num frames 3300...
|
4779 |
+
[2025-08-05 05:38:40,288][34856] Num frames 3400...
|
4780 |
+
[2025-08-05 05:38:40,411][34856] Num frames 3500...
|
4781 |
+
[2025-08-05 05:38:40,538][34856] Num frames 3600...
|
4782 |
+
[2025-08-05 05:38:40,667][34856] Num frames 3700...
|
4783 |
+
[2025-08-05 05:38:40,810][34856] Num frames 3800...
|
4784 |
+
[2025-08-05 05:38:40,999][34856] Num frames 3900...
|
4785 |
+
[2025-08-05 05:38:41,116][34856] Avg episode rewards: #0: 21.330, true rewards: #0: 9.830
|
4786 |
+
[2025-08-05 05:38:41,116][34856] Avg episode reward: 21.330, avg true_objective: 9.830
|
4787 |
+
[2025-08-05 05:38:41,231][34856] Num frames 4000...
|
4788 |
+
[2025-08-05 05:38:41,403][34856] Num frames 4100...
|
4789 |
+
[2025-08-05 05:38:41,574][34856] Num frames 4200...
|
4790 |
+
[2025-08-05 05:38:41,754][34856] Num frames 4300...
|
4791 |
+
[2025-08-05 05:38:41,925][34856] Num frames 4400...
|
4792 |
+
[2025-08-05 05:38:42,109][34856] Num frames 4500...
|
4793 |
+
[2025-08-05 05:38:42,287][34856] Num frames 4600...
|
4794 |
+
[2025-08-05 05:38:42,496][34856] Num frames 4700...
|
4795 |
+
[2025-08-05 05:38:42,675][34856] Num frames 4800...
|
4796 |
+
[2025-08-05 05:38:42,863][34856] Num frames 4900...
|
4797 |
+
[2025-08-05 05:38:43,051][34856] Num frames 5000...
|
4798 |
+
[2025-08-05 05:38:43,195][34856] Num frames 5100...
|
4799 |
+
[2025-08-05 05:38:43,324][34856] Num frames 5200...
|
4800 |
+
[2025-08-05 05:38:43,454][34856] Num frames 5300...
|
4801 |
+
[2025-08-05 05:38:43,579][34856] Num frames 5400...
|
4802 |
+
[2025-08-05 05:38:43,712][34856] Num frames 5500...
|
4803 |
+
[2025-08-05 05:38:43,845][34856] Avg episode rewards: #0: 24.928, true rewards: #0: 11.128
|
4804 |
+
[2025-08-05 05:38:43,846][34856] Avg episode reward: 24.928, avg true_objective: 11.128
|
4805 |
+
[2025-08-05 05:38:43,892][34856] Num frames 5600...
|
4806 |
+
[2025-08-05 05:38:44,015][34856] Num frames 5700...
|
4807 |
+
[2025-08-05 05:38:44,144][34856] Num frames 5800...
|
4808 |
+
[2025-08-05 05:38:44,275][34856] Num frames 5900...
|
4809 |
+
[2025-08-05 05:38:44,406][34856] Num frames 6000...
|
4810 |
+
[2025-08-05 05:38:44,516][34856] Avg episode rewards: #0: 22.073, true rewards: #0: 10.073
|
4811 |
+
[2025-08-05 05:38:44,516][34856] Avg episode reward: 22.073, avg true_objective: 10.073
|
4812 |
+
[2025-08-05 05:38:44,587][34856] Num frames 6100...
|
4813 |
+
[2025-08-05 05:38:44,716][34856] Num frames 6200...
|
4814 |
+
[2025-08-05 05:38:44,844][34856] Num frames 6300...
|
4815 |
+
[2025-08-05 05:38:44,968][34856] Num frames 6400...
|
4816 |
+
[2025-08-05 05:38:45,091][34856] Num frames 6500...
|
4817 |
+
[2025-08-05 05:38:45,233][34856] Num frames 6600...
|
4818 |
+
[2025-08-05 05:38:45,357][34856] Num frames 6700...
|
4819 |
+
[2025-08-05 05:38:45,483][34856] Num frames 6800...
|
4820 |
+
[2025-08-05 05:38:45,608][34856] Num frames 6900...
|
4821 |
+
[2025-08-05 05:38:45,740][34856] Num frames 7000...
|
4822 |
+
[2025-08-05 05:38:45,838][34856] Avg episode rewards: #0: 21.909, true rewards: #0: 10.051
|
4823 |
+
[2025-08-05 05:38:45,839][34856] Avg episode reward: 21.909, avg true_objective: 10.051
|
4824 |
+
[2025-08-05 05:38:45,922][34856] Num frames 7100...
|
4825 |
+
[2025-08-05 05:38:46,050][34856] Num frames 7200...
|
4826 |
+
[2025-08-05 05:38:46,179][34856] Num frames 7300...
|
4827 |
+
[2025-08-05 05:38:46,315][34856] Num frames 7400...
|
4828 |
+
[2025-08-05 05:38:46,442][34856] Num frames 7500...
|
4829 |
+
[2025-08-05 05:38:46,569][34856] Num frames 7600...
|
4830 |
+
[2025-08-05 05:38:46,695][34856] Num frames 7700...
|
4831 |
+
[2025-08-05 05:38:46,836][34856] Num frames 7800...
|
4832 |
+
[2025-08-05 05:38:46,964][34856] Num frames 7900...
|
4833 |
+
[2025-08-05 05:38:47,088][34856] Num frames 8000...
|
4834 |
+
[2025-08-05 05:38:47,213][34856] Num frames 8100...
|
4835 |
+
[2025-08-05 05:38:47,352][34856] Num frames 8200...
|
4836 |
+
[2025-08-05 05:38:47,482][34856] Num frames 8300...
|
4837 |
+
[2025-08-05 05:38:47,610][34856] Num frames 8400...
|
4838 |
+
[2025-08-05 05:38:47,737][34856] Num frames 8500...
|
4839 |
+
[2025-08-05 05:38:47,860][34856] Num frames 8600...
|
4840 |
+
[2025-08-05 05:38:47,990][34856] Num frames 8700...
|
4841 |
+
[2025-08-05 05:38:48,117][34856] Num frames 8800...
|
4842 |
+
[2025-08-05 05:38:48,242][34856] Num frames 8900...
|
4843 |
+
[2025-08-05 05:38:48,413][34856] Num frames 9000...
|
4844 |
+
[2025-08-05 05:38:48,547][34856] Num frames 9100...
|
4845 |
+
[2025-08-05 05:38:48,645][34856] Avg episode rewards: #0: 25.920, true rewards: #0: 11.420
|
4846 |
+
[2025-08-05 05:38:48,645][34856] Avg episode reward: 25.920, avg true_objective: 11.420
|
4847 |
+
[2025-08-05 05:38:48,722][34856] Num frames 9200...
|
4848 |
+
[2025-08-05 05:38:48,845][34856] Num frames 9300...
|
4849 |
+
[2025-08-05 05:38:48,967][34856] Num frames 9400...
|
4850 |
+
[2025-08-05 05:38:49,087][34856] Num frames 9500...
|
4851 |
+
[2025-08-05 05:38:49,222][34856] Num frames 9600...
|
4852 |
+
[2025-08-05 05:38:49,358][34856] Num frames 9700...
|
4853 |
+
[2025-08-05 05:38:49,494][34856] Num frames 9800...
|
4854 |
+
[2025-08-05 05:38:49,619][34856] Num frames 9900...
|
4855 |
+
[2025-08-05 05:38:49,747][34856] Num frames 10000...
|
4856 |
+
[2025-08-05 05:38:49,873][34856] Num frames 10100...
|
4857 |
+
[2025-08-05 05:38:49,998][34856] Num frames 10200...
|
4858 |
+
[2025-08-05 05:38:50,125][34856] Num frames 10300...
|
4859 |
+
[2025-08-05 05:38:50,252][34856] Num frames 10400...
|
4860 |
+
[2025-08-05 05:38:50,390][34856] Num frames 10500...
|
4861 |
+
[2025-08-05 05:38:50,517][34856] Num frames 10600...
|
4862 |
+
[2025-08-05 05:38:50,650][34856] Num frames 10700...
|
4863 |
+
[2025-08-05 05:38:50,778][34856] Num frames 10800...
|
4864 |
+
[2025-08-05 05:38:50,871][34856] Avg episode rewards: #0: 27.369, true rewards: #0: 12.036
|
4865 |
+
[2025-08-05 05:38:50,871][34856] Avg episode reward: 27.369, avg true_objective: 12.036
|
4866 |
+
[2025-08-05 05:38:50,961][34856] Num frames 10900...
|
4867 |
+
[2025-08-05 05:38:51,087][34856] Num frames 11000...
|
4868 |
+
[2025-08-05 05:38:51,214][34856] Num frames 11100...
|
4869 |
+
[2025-08-05 05:38:51,346][34856] Num frames 11200...
|
4870 |
+
[2025-08-05 05:38:51,471][34856] Avg episode rewards: #0: 25.248, true rewards: #0: 11.248
|
4871 |
+
[2025-08-05 05:38:51,471][34856] Avg episode reward: 25.248, avg true_objective: 11.248
|
4872 |
+
[2025-08-05 05:39:57,493][34856] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|