Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

.gitattributes +1 -0
.summary/0/events.out.tfevents.1752406344.7ee6365361bc +3 -0
.summary/0/events.out.tfevents.1752406515.7ee6365361bc +3 -0
README.md +56 -0
checkpoint_p0/best_000000041_167936_reward_4.766.pth +3 -0
checkpoint_p0/checkpoint_000000051_208896.pth +3 -0
checkpoint_p0/checkpoint_000000053_217088.pth +3 -0
config.json +143 -0
replay.mp4 +3 -0
sf_log.txt +695 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+replay.mp4 filter=lfs diff=lfs merge=lfs -text

.summary/0/events.out.tfevents.1752406344.7ee6365361bc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc98f0422f716f0bebd12e0b5ff32b15f4cdc828223e68941422af2f924a08d1
+size 71424

.summary/0/events.out.tfevents.1752406515.7ee6365361bc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb5cf132b02e682e1d7a043080a6d13bcd473a9ba34ccca634d40f8168e1571d
+size 68936

README.md ADDED Viewed

	@@ -0,0 +1,56 @@

+---
+library_name: sample-factory
+tags:
+- deep-reinforcement-learning
+- reinforcement-learning
+- sample-factory
+model-index:
+- name: APPO
+  results:
+  - task:
+      type: reinforcement-learning
+      name: reinforcement-learning
+    dataset:
+      name: doom_health_gathering_supreme
+      type: doom_health_gathering_supreme
+    metrics:
+    - type: mean_reward
+      value: 3.52 +/- 0.55
+      name: mean_reward
+      verified: false
+---
+A(n) **APPO** model trained on the **doom_health_gathering_supreme** environment.
+This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
+Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
+## Downloading the model
+After installing Sample-Factory, download the model with:
+```
+python -m sample_factory.huggingface.load_from_hub -r lokeessshhhh/rl_vizdoom_health_gathering
+```
+## Using the model
+To run the model after download, use the `enjoy` script corresponding to this environment:
+```
+python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_vizdoom_health_gathering
+```
+You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
+See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
+## Training with this model
+To continue training with this model, use the `train` script corresponding to this environment:
+```
+python -m <path.to.train.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_vizdoom_health_gathering --restart_behavior=resume --train_for_env_steps=10000000000
+```
+Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.

checkpoint_p0/best_000000041_167936_reward_4.766.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee735185d5c0abf6417575a494c0d262bb25bd74e88ccf4502ed03c8f98c612f
+size 34929051

checkpoint_p0/checkpoint_000000051_208896.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:945044c2189c3dcb74e02f1fe84203a3a416e8ffd3c9fc3fedb9ce2ed37d7087
+size 34929477

checkpoint_p0/checkpoint_000000053_217088.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf9cde09fbf8c2bfb97d48a33ad37407e78cd2b884828bd0954bf6f46f6fa5e5
+size 34929477

config.json ADDED Viewed

	@@ -0,0 +1,143 @@

+{
+  "help": false,
+  "algo": "APPO",
+  "env": "doom_health_gathering_supreme",
+  "experiment": "vizdoom_exp",
+  "train_dir": "./train_dir",
+  "restart_behavior": "resume",
+  "device": "gpu",
+  "seed": null,
+  "num_policies": 1,
+  "async_rl": true,
+  "serial_mode": false,
+  "batched_sampling": false,
+  "num_batches_to_accumulate": 2,
+  "worker_num_splits": 2,
+  "policy_workers_per_policy": 1,
+  "max_policy_lag": 1000,
+  "num_workers": 2,
+  "num_envs_per_worker": 2,
+  "batch_size": 1024,
+  "num_batches_per_epoch": 1,
+  "num_epochs": 1,
+  "rollout": 32,
+  "recurrence": 32,
+  "shuffle_minibatches": false,
+  "gamma": 0.99,
+  "reward_scale": 1.0,
+  "reward_clip": 1000.0,
+  "value_bootstrap": false,
+  "normalize_returns": true,
+  "exploration_loss_coeff": 0.001,
+  "value_loss_coeff": 0.5,
+  "kl_loss_coeff": 0.0,
+  "exploration_loss": "symmetric_kl",
+  "gae_lambda": 0.95,
+  "ppo_clip_ratio": 0.1,
+  "ppo_clip_value": 0.2,
+  "with_vtrace": false,
+  "vtrace_rho": 1.0,
+  "vtrace_c": 1.0,
+  "optimizer": "adam",
+  "adam_eps": 1e-06,
+  "adam_beta1": 0.9,
+  "adam_beta2": 0.999,
+  "max_grad_norm": 4.0,
+  "learning_rate": 0.0001,
+  "lr_schedule": "constant",
+  "lr_schedule_kl_threshold": 0.008,
+  "lr_adaptive_min": 1e-06,
+  "lr_adaptive_max": 0.01,
+  "obs_subtract_mean": 0.0,
+  "obs_scale": 255.0,
+  "normalize_input": true,
+  "normalize_input_keys": null,
+  "decorrelate_experience_max_seconds": 0,
+  "decorrelate_envs_on_one_worker": true,
+  "actor_worker_gpus": [],
+  "set_workers_cpu_affinity": true,
+  "force_envs_single_thread": false,
+  "default_niceness": 0,
+  "log_to_file": true,
+  "experiment_summaries_interval": 10,
+  "flush_summaries_interval": 30,
+  "stats_avg": 100,
+  "summaries_use_frameskip": true,
+  "heartbeat_interval": 20,
+  "heartbeat_reporting_interval": 600,
+  "train_for_env_steps": 200000,
+  "train_for_seconds": 10000000000,
+  "save_every_sec": 120,
+  "keep_checkpoints": 2,
+  "load_checkpoint_kind": "latest",
+  "save_milestones_sec": -1,
+  "save_best_every_sec": 5,
+  "save_best_metric": "reward",
+  "save_best_after": 100000,
+  "benchmark": false,
+  "encoder_mlp_layers": [
+    512,
+    512
+  ],
+  "encoder_conv_architecture": "convnet_simple",
+  "encoder_conv_mlp_layers": [
+    512
+  ],
+  "use_rnn": true,
+  "rnn_size": 512,
+  "rnn_type": "gru",
+  "rnn_num_layers": 1,
+  "decoder_mlp_layers": [],
+  "nonlinearity": "elu",
+  "policy_initialization": "orthogonal",
+  "policy_init_gain": 1.0,
+  "actor_critic_share_weights": true,
+  "adaptive_stddev": true,
+  "continuous_tanh_scale": 0.0,
+  "initial_stddev": 1.0,
+  "use_env_info_cache": false,
+  "env_gpu_actions": false,
+  "env_gpu_observations": true,
+  "env_frameskip": 4,
+  "env_framestack": 1,
+  "pixel_format": "CHW",
+  "use_record_episode_statistics": false,
+  "with_wandb": false,
+  "wandb_user": null,
+  "wandb_project": "sample_factory",
+  "wandb_group": null,
+  "wandb_job_type": "SF",
+  "wandb_tags": [],
+  "with_pbt": false,
+  "pbt_mix_policies_in_one_env": true,
+  "pbt_period_env_steps": 5000000,
+  "pbt_start_mutation": 20000000,
+  "pbt_replace_fraction": 0.3,
+  "pbt_mutation_rate": 0.15,
+  "pbt_replace_reward_gap": 0.1,
+  "pbt_replace_reward_gap_absolute": 1e-06,
+  "pbt_optimize_gamma": false,
+  "pbt_target_objective": "true_objective",
+  "pbt_perturb_min": 1.1,
+  "pbt_perturb_max": 1.5,
+  "num_agents": -1,
+  "num_humans": 0,
+  "num_bots": -1,
+  "start_bot_difficulty": null,
+  "timelimit": null,
+  "res_w": 128,
+  "res_h": 72,
+  "wide_aspect_ratio": false,
+  "eval_env_frameskip": 1,
+  "fps": 35,
+  "command_line": "--env=doom_health_gathering_supreme --experiment=vizdoom_exp --train_dir=./train_dir --num_workers=2 --num_envs_per_worker=2",
+  "cli_args": {
+    "env": "doom_health_gathering_supreme",
+    "experiment": "vizdoom_exp",
+    "train_dir": "./train_dir",
+    "num_workers": 2,
+    "num_envs_per_worker": 2
+  },
+  "git_hash": "unknown",
+  "git_repo_name": "not a git repository"
+}

replay.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c012dd4e71bbaffeb8d176b43f81991a69b62e99cb3e61c650b87c5c07cda2ba
+size 2306568

sf_log.txt ADDED Viewed

	@@ -0,0 +1,695 @@

+[2025-07-13 11:32:27,039][02343] Saving configuration to ./train_dir/vizdoom_exp/config.json...
+[2025-07-13 11:32:27,041][02343] Rollout worker 0 uses device cpu
+[2025-07-13 11:32:27,042][02343] Rollout worker 1 uses device cpu
+[2025-07-13 11:32:27,189][02343] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-07-13 11:32:27,190][02343] InferenceWorker_p0-w0: min num requests: 1
+[2025-07-13 11:32:27,200][02343] Starting all processes...
+[2025-07-13 11:32:27,201][02343] Starting process learner_proc0
+[2025-07-13 11:32:27,262][02343] Starting all processes...
+[2025-07-13 11:32:27,268][02343] Starting process inference_proc0-0
+[2025-07-13 11:32:27,269][02343] Starting process rollout_proc0
+[2025-07-13 11:32:27,269][02343] Starting process rollout_proc1
+[2025-07-13 11:32:33,042][06118] Worker 1 uses CPU cores [1]
+[2025-07-13 11:32:33,235][06119] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-07-13 11:32:33,235][06119] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2025-07-13 11:32:33,255][06119] Num visible devices: 1
+[2025-07-13 11:32:33,310][06117] Worker 0 uses CPU cores [0]
+[2025-07-13 11:32:33,348][06110] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-07-13 11:32:33,348][06110] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2025-07-13 11:32:33,365][06110] Num visible devices: 1
+[2025-07-13 11:32:33,365][06110] Starting seed is not provided
+[2025-07-13 11:32:33,366][06110] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-07-13 11:32:33,366][06110] Initializing actor-critic model on device cuda:0
+[2025-07-13 11:32:33,366][06110] RunningMeanStd input shape: (3, 72, 128)
+[2025-07-13 11:32:33,369][06110] RunningMeanStd input shape: (1,)
+[2025-07-13 11:32:33,380][06110] ConvEncoder: input_channels=3
+[2025-07-13 11:32:33,653][06110] Conv encoder output size: 512
+[2025-07-13 11:32:33,653][06110] Policy head output size: 512
+[2025-07-13 11:32:33,708][06110] Created Actor Critic model with architecture:
+[2025-07-13 11:32:33,709][06110] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): VizdoomEncoder(
+    (basic_encoder): ConvEncoder(
+      (enc): RecursiveScriptModule(
+        original_name=ConvEncoderImpl
+        (conv_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Conv2d)
+          (1): RecursiveScriptModule(original_name=ELU)
+          (2): RecursiveScriptModule(original_name=Conv2d)
+          (3): RecursiveScriptModule(original_name=ELU)
+          (4): RecursiveScriptModule(original_name=Conv2d)
+          (5): RecursiveScriptModule(original_name=ELU)
+        )
+        (mlp_layers): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ELU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreRNN(
+    (core): GRU(512, 512)
+  )
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=512, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationDefault(
+    (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
+  )
+)
+[2025-07-13 11:32:33,948][06110] Using optimizer <class 'torch.optim.adam.Adam'>
+[2025-07-13 11:32:38,823][06110] No checkpoints found
+[2025-07-13 11:32:38,823][06110] Did not load from checkpoint, starting from scratch!
+[2025-07-13 11:32:38,823][06110] Initialized policy 0 weights for model version 0
+[2025-07-13 11:32:38,826][06110] LearnerWorker_p0 finished initialization!
+[2025-07-13 11:32:38,827][06110] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-07-13 11:32:38,965][06119] RunningMeanStd input shape: (3, 72, 128)
+[2025-07-13 11:32:38,966][06119] RunningMeanStd input shape: (1,)
+[2025-07-13 11:32:38,977][06119] ConvEncoder: input_channels=3
+[2025-07-13 11:32:39,078][06119] Conv encoder output size: 512
+[2025-07-13 11:32:39,079][06119] Policy head output size: 512
+[2025-07-13 11:32:39,115][02343] Inference worker 0-0 is ready!
+[2025-07-13 11:32:39,117][02343] All inference workers are ready! Signal rollout workers to start!
+[2025-07-13 11:32:39,167][06118] Doom resolution: 160x120, resize resolution: (128, 72)
+[2025-07-13 11:32:39,177][06117] Doom resolution: 160x120, resize resolution: (128, 72)
+[2025-07-13 11:32:39,251][02343] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-07-13 11:32:39,562][06118] Decorrelating experience for 0 frames...
+[2025-07-13 11:32:39,581][06117] Decorrelating experience for 0 frames...
+[2025-07-13 11:32:40,000][06118] Decorrelating experience for 32 frames...
+[2025-07-13 11:32:40,013][06117] Decorrelating experience for 32 frames...
+[2025-07-13 11:32:44,253][02343] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 138.2. Samples: 691. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-07-13 11:32:44,254][02343] Avg episode reward: [(0, '4.091')]
+[2025-07-13 11:32:47,181][02343] Heartbeat connected on Batcher_0
+[2025-07-13 11:32:47,185][02343] Heartbeat connected on LearnerWorker_p0
+[2025-07-13 11:32:47,194][02343] Heartbeat connected on InferenceWorker_p0-w0
+[2025-07-13 11:32:47,199][02343] Heartbeat connected on RolloutWorker_w0
+[2025-07-13 11:32:47,202][02343] Heartbeat connected on RolloutWorker_w1
+[2025-07-13 11:32:49,255][02343] Fps is (10 sec: 1228.3, 60 sec: 1228.3, 300 sec: 1228.3). Total num frames: 12288. Throughput: 0: 305.8. Samples: 3059. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:32:49,257][02343] Avg episode reward: [(0, '4.554')]
+[2025-07-13 11:32:54,251][02343] Fps is (10 sec: 2048.3, 60 sec: 1365.3, 300 sec: 1365.3). Total num frames: 20480. Throughput: 0: 395.1. Samples: 5927. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:32:54,256][02343] Avg episode reward: [(0, '4.525')]
+[2025-07-13 11:32:59,251][02343] Fps is (10 sec: 2048.8, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 32768. Throughput: 0: 377.1. Samples: 7543. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:32:59,255][02343] Avg episode reward: [(0, '4.484')]
+[2025-07-13 11:33:02,707][06119] Updated weights for policy 0, policy_version 10 (0.0020)
+[2025-07-13 11:33:04,252][02343] Fps is (10 sec: 2048.0, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 40960. Throughput: 0: 418.6. Samples: 10465. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:33:04,256][02343] Avg episode reward: [(0, '4.518')]
+[2025-07-13 11:33:09,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1774.9, 300 sec: 1774.9). Total num frames: 53248. Throughput: 0: 458.9. Samples: 13767. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:33:09,255][02343] Avg episode reward: [(0, '4.456')]
+[2025-07-13 11:33:14,251][02343] Fps is (10 sec: 2457.6, 60 sec: 1872.5, 300 sec: 1872.5). Total num frames: 65536. Throughput: 0: 442.7. Samples: 15493. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:33:14,254][02343] Avg episode reward: [(0, '4.435')]
+[2025-07-13 11:33:19,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1843.2, 300 sec: 1843.2). Total num frames: 73728. Throughput: 0: 456.6. Samples: 18262. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:33:19,255][02343] Avg episode reward: [(0, '4.358')]
+[2025-07-13 11:33:22,039][06119] Updated weights for policy 0, policy_version 20 (0.0013)
+[2025-07-13 11:33:24,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 86016. Throughput: 0: 482.6. Samples: 21718. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:33:24,254][02343] Avg episode reward: [(0, '4.320')]
+[2025-07-13 11:33:29,254][02343] Fps is (10 sec: 2047.9, 60 sec: 1884.1, 300 sec: 1884.1). Total num frames: 94208. Throughput: 0: 505.8. Samples: 23452. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:33:29,255][02343] Avg episode reward: [(0, '4.360')]
+[2025-07-13 11:33:34,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1936.3, 300 sec: 1936.3). Total num frames: 106496. Throughput: 0: 513.3. Samples: 26154. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:33:34,256][02343] Avg episode reward: [(0, '4.347')]
+[2025-07-13 11:33:34,262][06110] Saving new best policy, reward=4.347!
+[2025-07-13 11:33:39,251][02343] Fps is (10 sec: 2048.1, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 114688. Throughput: 0: 525.0. Samples: 29551. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:33:39,253][02343] Avg episode reward: [(0, '4.298')]
+[2025-07-13 11:33:41,944][06119] Updated weights for policy 0, policy_version 30 (0.0014)
+[2025-07-13 11:33:44,251][02343] Fps is (10 sec: 1638.4, 60 sec: 2048.0, 300 sec: 1890.5). Total num frames: 122880. Throughput: 0: 516.9. Samples: 30803. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:33:44,255][02343] Avg episode reward: [(0, '4.267')]
+[2025-07-13 11:33:49,251][02343] Fps is (10 sec: 2048.0, 60 sec: 2048.1, 300 sec: 1931.0). Total num frames: 135168. Throughput: 0: 522.6. Samples: 33984. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:33:49,255][02343] Avg episode reward: [(0, '4.283')]
+[2025-07-13 11:33:54,253][02343] Fps is (10 sec: 2457.2, 60 sec: 2116.2, 300 sec: 1966.0). Total num frames: 147456. Throughput: 0: 523.3. Samples: 37317. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:33:54,254][02343] Avg episode reward: [(0, '4.434')]
+[2025-07-13 11:33:54,261][06110] Saving new best policy, reward=4.434!
+[2025-07-13 11:33:59,251][02343] Fps is (10 sec: 2048.0, 60 sec: 2048.0, 300 sec: 1945.6). Total num frames: 155648. Throughput: 0: 509.5. Samples: 38420. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:33:59,258][02343] Avg episode reward: [(0, '4.585')]
+[2025-07-13 11:33:59,264][06110] Saving new best policy, reward=4.585!
+[2025-07-13 11:34:01,670][06119] Updated weights for policy 0, policy_version 40 (0.0013)
+[2025-07-13 11:34:04,251][02343] Fps is (10 sec: 2048.3, 60 sec: 2116.3, 300 sec: 1975.7). Total num frames: 167936. Throughput: 0: 525.0. Samples: 41885. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:34:04,255][02343] Avg episode reward: [(0, '4.766')]
+[2025-07-13 11:34:04,263][06110] Saving new best policy, reward=4.766!
+[2025-07-13 11:34:09,259][02343] Fps is (10 sec: 2046.4, 60 sec: 2047.7, 300 sec: 1956.8). Total num frames: 176128. Throughput: 0: 510.4. Samples: 44692. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:34:09,261][02343] Avg episode reward: [(0, '4.719')]
+[2025-07-13 11:34:14,251][02343] Fps is (10 sec: 2048.0, 60 sec: 2048.0, 300 sec: 1983.3). Total num frames: 188416. Throughput: 0: 507.6. Samples: 46295. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:34:14,255][02343] Avg episode reward: [(0, '4.561')]
+[2025-07-13 11:34:19,251][02343] Fps is (10 sec: 2459.5, 60 sec: 2116.3, 300 sec: 2007.0). Total num frames: 200704. Throughput: 0: 525.2. Samples: 49790. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:34:19,255][02343] Avg episode reward: [(0, '4.330')]
+[2025-07-13 11:34:21,466][06119] Updated weights for policy 0, policy_version 50 (0.0017)
+[2025-07-13 11:34:24,257][02343] Fps is (10 sec: 2046.8, 60 sec: 2047.8, 300 sec: 1989.4). Total num frames: 208896. Throughput: 0: 509.2. Samples: 52469. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:34:24,262][02343] Avg episode reward: [(0, '4.333')]
+[2025-07-13 11:34:24,270][06110] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000051_208896.pth...
+[2025-07-13 11:34:28,853][02343] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 2343], exiting...
+[2025-07-13 11:34:28,857][06110] Stopping Batcher_0...
+[2025-07-13 11:34:28,857][06110] Loop batcher_evt_loop terminating...
+[2025-07-13 11:34:28,859][06110] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
+[2025-07-13 11:34:28,899][06118] EvtLoop [rollout_proc1_evt_loop, process=rollout_proc1] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance1'), args=(1, 0)
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.11/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal
+    slot_callable(*args)
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts
+    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)
+                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts
+    new_obs, rewards, terminated, truncated, infos = e.step(actions)
+                                                     ^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/gymnasium/core.py", line 461, in step
+    return self.env.step(action)
+           ^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step
+    obs, rew, terminated, truncated, info = self.env.step(action)
+                                            ^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step
+    obs, rew, terminated, truncated, info = self.env.step(action)
+                                            ^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step
+    observation, reward, terminated, truncated, info = self.env.step(action)
+                                                       ^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/gymnasium/core.py", line 522, in step
+    observation, reward, terminated, truncated, info = self.env.step(action)
+                                                       ^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step
+    obs, reward, terminated, truncated, info = self.env.step(action)
+                                               ^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/gymnasium/core.py", line 461, in step
+    return self.env.step(action)
+           ^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step
+    obs, reward, terminated, truncated, info = self.env.step(action)
+                                               ^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step
+    reward = self.game.make_action(actions_flattened, self.skip_frames)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.
+[2025-07-13 11:34:28,912][06118] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc1_evt_loop
+[2025-07-13 11:34:28,945][06119] Weights refcount: 2 0
+[2025-07-13 11:34:28,949][06119] Stopping InferenceWorker_p0-w0...
+[2025-07-13 11:34:28,949][06119] Loop inference_proc0-0_evt_loop terminating...
+[2025-07-13 11:34:29,010][06110] Stopping LearnerWorker_p0...
+[2025-07-13 11:34:29,011][06110] Loop learner_proc0_evt_loop terminating...
+[2025-07-13 11:35:24,325][06939] Saving configuration to ./train_dir/vizdoom_exp/config.json...
+[2025-07-13 11:35:24,327][06939] Rollout worker 0 uses device cpu
+[2025-07-13 11:35:24,328][06939] Rollout worker 1 uses device cpu
+[2025-07-13 11:35:24,454][06939] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-07-13 11:35:24,455][06939] InferenceWorker_p0-w0: min num requests: 1
+[2025-07-13 11:35:24,467][06939] Starting all processes...
+[2025-07-13 11:35:24,468][06939] Starting process learner_proc0
+[2025-07-13 11:35:24,769][06939] Starting all processes...
+[2025-07-13 11:35:24,783][06939] Starting process inference_proc0-0
+[2025-07-13 11:35:24,784][06939] Starting process rollout_proc0
+[2025-07-13 11:35:24,784][06939] Starting process rollout_proc1
+[2025-07-13 11:35:30,789][07359] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-07-13 11:35:30,794][07359] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2025-07-13 11:35:30,857][07359] Num visible devices: 1
+[2025-07-13 11:35:30,861][07359] Starting seed is not provided
+[2025-07-13 11:35:30,861][07359] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-07-13 11:35:30,862][07359] Initializing actor-critic model on device cuda:0
+[2025-07-13 11:35:30,863][07359] RunningMeanStd input shape: (3, 72, 128)
+[2025-07-13 11:35:30,866][07359] RunningMeanStd input shape: (1,)
+[2025-07-13 11:35:30,937][07359] ConvEncoder: input_channels=3
+[2025-07-13 11:35:31,320][07368] Worker 1 uses CPU cores [1]
+[2025-07-13 11:35:31,517][07359] Conv encoder output size: 512
+[2025-07-13 11:35:31,517][07359] Policy head output size: 512
+[2025-07-13 11:35:31,549][07359] Created Actor Critic model with architecture:
+[2025-07-13 11:35:31,550][07359] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): VizdoomEncoder(
+    (basic_encoder): ConvEncoder(
+      (enc): RecursiveScriptModule(
+        original_name=ConvEncoderImpl
+        (conv_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Conv2d)
+          (1): RecursiveScriptModule(original_name=ELU)
+          (2): RecursiveScriptModule(original_name=Conv2d)
+          (3): RecursiveScriptModule(original_name=ELU)
+          (4): RecursiveScriptModule(original_name=Conv2d)
+          (5): RecursiveScriptModule(original_name=ELU)
+        )
+        (mlp_layers): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ELU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreRNN(
+    (core): GRU(512, 512)
+  )
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=512, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationDefault(
+    (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
+  )
+)
+[2025-07-13 11:35:31,593][07367] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-07-13 11:35:31,594][07367] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2025-07-13 11:35:31,634][07367] Num visible devices: 1
+[2025-07-13 11:35:31,636][07369] Worker 0 uses CPU cores [0]
+[2025-07-13 11:35:31,742][07359] Using optimizer <class 'torch.optim.adam.Adam'>
+[2025-07-13 11:35:33,126][07359] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
+[2025-07-13 11:35:33,127][07359] Could not load from checkpoint, attempt 0
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
+    checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
+                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
+    raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
+_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
+	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
+	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
+	WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
+Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
+[2025-07-13 11:35:33,130][07359] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
+[2025-07-13 11:35:33,131][07359] Could not load from checkpoint, attempt 1
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
+    checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
+                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
+    raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
+_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
+	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
+	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
+	WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
+Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
+[2025-07-13 11:35:33,132][07359] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
+[2025-07-13 11:35:33,132][07359] Could not load from checkpoint, attempt 2
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
+    checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
+                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
+    raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
+_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
+	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
+	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
+	WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
+Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
+[2025-07-13 11:35:33,133][07359] Did not load from checkpoint, starting from scratch!
+[2025-07-13 11:35:33,133][07359] Initialized policy 0 weights for model version 0
+[2025-07-13 11:35:33,143][07359] LearnerWorker_p0 finished initialization!
+[2025-07-13 11:35:33,143][07359] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-07-13 11:35:33,241][07367] RunningMeanStd input shape: (3, 72, 128)
+[2025-07-13 11:35:33,243][07367] RunningMeanStd input shape: (1,)
+[2025-07-13 11:35:33,253][07367] ConvEncoder: input_channels=3
+[2025-07-13 11:35:33,370][07367] Conv encoder output size: 512
+[2025-07-13 11:35:33,370][07367] Policy head output size: 512
+[2025-07-13 11:35:33,403][06939] Inference worker 0-0 is ready!
+[2025-07-13 11:35:33,404][06939] All inference workers are ready! Signal rollout workers to start!
+[2025-07-13 11:35:33,457][07368] Doom resolution: 160x120, resize resolution: (128, 72)
+[2025-07-13 11:35:33,456][07369] Doom resolution: 160x120, resize resolution: (128, 72)
+[2025-07-13 11:35:33,844][07369] Decorrelating experience for 0 frames...
+[2025-07-13 11:35:33,851][07368] Decorrelating experience for 0 frames...
+[2025-07-13 11:35:34,276][07369] Decorrelating experience for 32 frames...
+[2025-07-13 11:35:34,286][07368] Decorrelating experience for 32 frames...
+[2025-07-13 11:35:35,802][06939] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-07-13 11:35:35,811][06939] Avg episode reward: [(0, '3.216')]
+[2025-07-13 11:35:40,802][06939] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 273.6. Samples: 1368. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-07-13 11:35:40,807][06939] Avg episode reward: [(0, '4.118')]
+[2025-07-13 11:35:44,442][06939] Heartbeat connected on Batcher_0
+[2025-07-13 11:35:44,446][06939] Heartbeat connected on LearnerWorker_p0
+[2025-07-13 11:35:44,456][06939] Heartbeat connected on InferenceWorker_p0-w0
+[2025-07-13 11:35:44,462][06939] Heartbeat connected on RolloutWorker_w0
+[2025-07-13 11:35:44,473][06939] Heartbeat connected on RolloutWorker_w1
+[2025-07-13 11:35:45,802][06939] Fps is (10 sec: 1638.4, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 16384. Throughput: 0: 425.2. Samples: 4252. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:35:45,803][06939] Avg episode reward: [(0, '4.094')]
+[2025-07-13 11:35:50,802][06939] Fps is (10 sec: 2457.7, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 24576. Throughput: 0: 400.2. Samples: 6003. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:35:50,803][06939] Avg episode reward: [(0, '4.254')]
+[2025-07-13 11:35:55,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1843.2, 300 sec: 1843.2). Total num frames: 36864. Throughput: 0: 435.6. Samples: 8712. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:35:55,806][06939] Avg episode reward: [(0, '4.370')]
+[2025-07-13 11:35:57,194][07367] Updated weights for policy 0, policy_version 10 (0.0023)
+[2025-07-13 11:36:00,802][06939] Fps is (10 sec: 2457.6, 60 sec: 1966.1, 300 sec: 1966.1). Total num frames: 49152. Throughput: 0: 488.8. Samples: 12220. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:36:00,805][06939] Avg episode reward: [(0, '4.427')]
+[2025-07-13 11:36:05,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 57344. Throughput: 0: 451.5. Samples: 13545. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:36:05,803][06939] Avg episode reward: [(0, '4.483')]
+[2025-07-13 11:36:10,802][06939] Fps is (10 sec: 1638.4, 60 sec: 1872.5, 300 sec: 1872.5). Total num frames: 65536. Throughput: 0: 475.7. Samples: 16650. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:36:10,806][06939] Avg episode reward: [(0, '4.508')]
+[2025-07-13 11:36:15,804][06939] Fps is (10 sec: 2047.6, 60 sec: 1945.5, 300 sec: 1945.5). Total num frames: 77824. Throughput: 0: 490.3. Samples: 19613. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:36:15,807][06939] Avg episode reward: [(0, '4.467')]
+[2025-07-13 11:36:18,157][07367] Updated weights for policy 0, policy_version 20 (0.0020)
+[2025-07-13 11:36:20,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 86016. Throughput: 0: 459.3. Samples: 20670. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:36:20,805][06939] Avg episode reward: [(0, '4.500')]
+[2025-07-13 11:36:25,802][06939] Fps is (10 sec: 2048.5, 60 sec: 1966.1, 300 sec: 1966.1). Total num frames: 98304. Throughput: 0: 505.8. Samples: 24129. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:36:25,805][06939] Avg episode reward: [(0, '4.592')]
+[2025-07-13 11:36:30,806][06939] Fps is (10 sec: 2047.1, 60 sec: 1936.1, 300 sec: 1936.1). Total num frames: 106496. Throughput: 0: 505.5. Samples: 27002. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:36:30,810][06939] Avg episode reward: [(0, '4.454')]
+[2025-07-13 11:36:30,813][07359] Saving new best policy, reward=4.454!
+[2025-07-13 11:36:35,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1979.7, 300 sec: 1979.7). Total num frames: 118784. Throughput: 0: 499.9. Samples: 28498. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:36:35,803][06939] Avg episode reward: [(0, '4.456')]
+[2025-07-13 11:36:35,814][07359] Saving new best policy, reward=4.456!
+[2025-07-13 11:36:37,443][07367] Updated weights for policy 0, policy_version 30 (0.0014)
+[2025-07-13 11:36:40,802][06939] Fps is (10 sec: 2048.9, 60 sec: 2116.3, 300 sec: 1953.5). Total num frames: 126976. Throughput: 0: 515.4. Samples: 31905. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:36:40,809][06939] Avg episode reward: [(0, '4.306')]
+[2025-07-13 11:36:45,802][06939] Fps is (10 sec: 2048.0, 60 sec: 2048.0, 300 sec: 1989.5). Total num frames: 139264. Throughput: 0: 498.6. Samples: 34659. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:36:45,805][06939] Avg episode reward: [(0, '4.352')]
+[2025-07-13 11:36:50,803][06939] Fps is (10 sec: 2047.7, 60 sec: 2047.9, 300 sec: 1966.0). Total num frames: 147456. Throughput: 0: 507.2. Samples: 36371. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:36:50,808][06939] Avg episode reward: [(0, '4.362')]
+[2025-07-13 11:36:55,802][06939] Fps is (10 sec: 1638.3, 60 sec: 1979.7, 300 sec: 1945.6). Total num frames: 155648. Throughput: 0: 507.1. Samples: 39470. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:36:55,810][06939] Avg episode reward: [(0, '4.446')]
+[2025-07-13 11:36:58,610][07367] Updated weights for policy 0, policy_version 40 (0.0018)
+[2025-07-13 11:37:00,802][06939] Fps is (10 sec: 2048.4, 60 sec: 1979.7, 300 sec: 1975.7). Total num frames: 167936. Throughput: 0: 498.2. Samples: 42031. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:37:00,804][06939] Avg episode reward: [(0, '4.559')]
+[2025-07-13 11:37:00,808][07359] Saving new best policy, reward=4.559!
+[2025-07-13 11:37:05,805][06939] Fps is (10 sec: 2047.5, 60 sec: 1979.6, 300 sec: 1956.9). Total num frames: 176128. Throughput: 0: 511.3. Samples: 43682. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
+[2025-07-13 11:37:05,809][06939] Avg episode reward: [(0, '4.462')]
+[2025-07-13 11:37:10,802][06939] Fps is (10 sec: 1638.4, 60 sec: 1979.7, 300 sec: 1940.2). Total num frames: 184320. Throughput: 0: 498.0. Samples: 46541. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:37:10,806][06939] Avg episode reward: [(0, '4.346')]
+[2025-07-13 11:37:15,802][06939] Fps is (10 sec: 2048.6, 60 sec: 1979.8, 300 sec: 1966.1). Total num frames: 196608. Throughput: 0: 504.4. Samples: 49699. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
+[2025-07-13 11:37:15,805][06939] Avg episode reward: [(0, '4.272')]
+[2025-07-13 11:37:15,814][07359] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000048_196608.pth...
+[2025-07-13 11:37:15,891][07359] Removing ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000048_196608.pth
+[2025-07-13 11:37:18,217][07359] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth...
+[2025-07-13 11:37:18,218][07359] Stopping Batcher_0...
+[2025-07-13 11:37:18,225][07359] Loop batcher_evt_loop terminating...
+[2025-07-13 11:37:18,223][06939] Component Batcher_0 stopped!
+[2025-07-13 11:37:18,232][07367] Updated weights for policy 0, policy_version 50 (0.0014)
+[2025-07-13 11:37:18,266][07367] Weights refcount: 2 0
+[2025-07-13 11:37:18,268][06939] Component InferenceWorker_p0-w0 stopped!
+[2025-07-13 11:37:18,271][07367] Stopping InferenceWorker_p0-w0...
+[2025-07-13 11:37:18,273][07367] Loop inference_proc0-0_evt_loop terminating...
+[2025-07-13 11:37:18,298][07359] Removing ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth
+[2025-07-13 11:37:18,307][07359] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth...
+[2025-07-13 11:37:18,368][06939] Component RolloutWorker_w1 stopped!
+[2025-07-13 11:37:18,371][07368] Stopping RolloutWorker_w1...
+[2025-07-13 11:37:18,374][07368] Loop rollout_proc1_evt_loop terminating...
+[2025-07-13 11:37:18,387][07359] Removing ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth
+[2025-07-13 11:37:18,396][07359] Stopping LearnerWorker_p0...
+[2025-07-13 11:37:18,396][06939] Component LearnerWorker_p0 stopped!
+[2025-07-13 11:37:18,396][07359] Loop learner_proc0_evt_loop terminating...
+[2025-07-13 11:37:18,401][06939] Component RolloutWorker_w0 stopped!
+[2025-07-13 11:37:18,401][07369] Stopping RolloutWorker_w0...
+[2025-07-13 11:37:18,403][06939] Waiting for process learner_proc0 to stop...
+[2025-07-13 11:37:18,404][07369] Loop rollout_proc0_evt_loop terminating...
+[2025-07-13 11:37:19,546][06939] Waiting for process inference_proc0-0 to join...
+[2025-07-13 11:37:19,549][06939] Waiting for process rollout_proc0 to join...
+[2025-07-13 11:37:19,919][06939] Waiting for process rollout_proc1 to join...
+[2025-07-13 11:37:19,921][06939] Batcher 0 profile tree view:
+batching: 1.1778, releasing_batches: 0.0012
+[2025-07-13 11:37:19,923][06939] InferenceWorker_p0-w0 profile tree view:
+wait_policy: 0.0000
+  wait_policy_total: 15.3290
+update_model: 1.2617
+  weight_update: 0.0014
+one_step: 0.0035
+  handle_policy_step: 83.1065
+    deserialize: 1.7763, stack: 0.5898, obs_to_device_normalize: 20.5143, forward: 43.8813, send_messages: 2.3863
+    prepare_outputs: 10.0497
+      to_cpu: 6.0774
+[2025-07-13 11:37:19,924][06939] Learner 0 profile tree view:
+misc: 0.0002, prepare_batch: 1.8202
+train: 5.3572
+  epoch_init: 0.0002, minibatch_init: 0.0003, losses_postprocess: 0.0286, kl_divergence: 0.0488, after_optimizer: 1.7859
+  calculate_losses: 1.6856
+    losses_init: 0.0002, forward_head: 0.4117, bptt_initial: 0.9225, tail: 0.0585, advantages_returns: 0.0108, losses: 0.1807
+    bptt: 0.0892
+      bptt_forward_core: 0.0844
+  update: 1.7855
+    clip: 0.0915
+[2025-07-13 11:37:19,925][06939] RolloutWorker_w0 profile tree view:
+wait_for_trajectories: 0.0519, enqueue_policy_requests: 12.9735, env_step: 38.3118, overhead: 1.9176, complete_rollouts: 0.5191
+save_policy_outputs: 2.2653
+  split_output_tensors: 0.9149
+[2025-07-13 11:37:19,926][06939] RolloutWorker_w1 profile tree view:
+wait_for_trajectories: 0.0577, enqueue_policy_requests: 12.6762, env_step: 38.4079, overhead: 1.8928, complete_rollouts: 0.3994
+save_policy_outputs: 2.4013
+  split_output_tensors: 0.9375
+[2025-07-13 11:37:19,927][06939] Loop Runner_EvtLoop terminating...
+[2025-07-13 11:37:19,930][06939] Runner profile tree view:
+main_loop: 115.4634
+[2025-07-13 11:37:19,931][06939] Collected {0: 204800}, FPS: 1773.7
+[2025-07-13 11:37:19,942][06939] Environment doom_basic already registered, overwriting...
+[2025-07-13 11:37:19,943][06939] Environment doom_two_colors_easy already registered, overwriting...
+[2025-07-13 11:37:19,944][06939] Environment doom_two_colors_hard already registered, overwriting...
+[2025-07-13 11:37:19,945][06939] Environment doom_dm already registered, overwriting...
+[2025-07-13 11:37:19,946][06939] Environment doom_dwango5 already registered, overwriting...
+[2025-07-13 11:37:19,947][06939] Environment doom_my_way_home_flat_actions already registered, overwriting...
+[2025-07-13 11:37:19,947][06939] Environment doom_defend_the_center_flat_actions already registered, overwriting...
+[2025-07-13 11:37:19,948][06939] Environment doom_my_way_home already registered, overwriting...
+[2025-07-13 11:37:19,952][06939] Environment doom_deadly_corridor already registered, overwriting...
+[2025-07-13 11:37:19,953][06939] Environment doom_defend_the_center already registered, overwriting...
+[2025-07-13 11:37:19,954][06939] Environment doom_defend_the_line already registered, overwriting...
+[2025-07-13 11:37:19,955][06939] Environment doom_health_gathering already registered, overwriting...
+[2025-07-13 11:37:19,956][06939] Environment doom_health_gathering_supreme already registered, overwriting...
+[2025-07-13 11:37:19,956][06939] Environment doom_battle already registered, overwriting...
+[2025-07-13 11:37:19,957][06939] Environment doom_battle2 already registered, overwriting...
+[2025-07-13 11:37:19,958][06939] Environment doom_duel_bots already registered, overwriting...
+[2025-07-13 11:37:19,959][06939] Environment doom_deathmatch_bots already registered, overwriting...
+[2025-07-13 11:37:19,959][06939] Environment doom_duel already registered, overwriting...
+[2025-07-13 11:37:19,960][06939] Environment doom_deathmatch_full already registered, overwriting...
+[2025-07-13 11:37:19,961][06939] Environment doom_benchmark already registered, overwriting...
+[2025-07-13 11:37:19,962][06939] register_encoder_factory: <function make_vizdoom_encoder at 0x7f16fee9a340>
+[2025-07-13 11:39:55,913][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
+[2025-07-13 11:39:55,914][06939] Adding new argument 'no_render'=False that is not in the saved config file!
+[2025-07-13 11:39:55,915][06939] Adding new argument 'save_video'=True that is not in the saved config file!
+[2025-07-13 11:39:55,916][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2025-07-13 11:39:55,917][06939] Adding new argument 'video_name'=None that is not in the saved config file!
+[2025-07-13 11:39:55,917][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
+[2025-07-13 11:39:55,918][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
+[2025-07-13 11:39:55,919][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
+[2025-07-13 11:39:55,920][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
+[2025-07-13 11:39:55,921][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2025-07-13 11:39:55,922][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2025-07-13 11:39:55,922][06939] Adding new argument 'train_script'=None that is not in the saved config file!
+[2025-07-13 11:39:55,923][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2025-07-13 11:39:55,924][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2025-07-13 11:39:55,953][06939] Doom resolution: 160x120, resize resolution: (128, 72)
+[2025-07-13 11:39:55,957][06939] RunningMeanStd input shape: (3, 72, 128)
+[2025-07-13 11:39:55,959][06939] RunningMeanStd input shape: (1,)
+[2025-07-13 11:39:55,974][06939] ConvEncoder: input_channels=3
+[2025-07-13 11:39:56,088][06939] Conv encoder output size: 512
+[2025-07-13 11:39:56,090][06939] Policy head output size: 512
+[2025-07-13 11:39:56,298][06939] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
+[2025-07-13 11:39:56,305][06939] Could not load from checkpoint, attempt 0
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
+    checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
+                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
+    raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
+_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
+	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
+	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
+	WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
+Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
+[2025-07-13 11:39:56,308][06939] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
+[2025-07-13 11:39:56,310][06939] Could not load from checkpoint, attempt 1
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
+    checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
+                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
+    raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
+_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
+	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
+	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
+	WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
+Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
+[2025-07-13 11:39:56,311][06939] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
+[2025-07-13 11:39:56,313][06939] Could not load from checkpoint, attempt 2
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
+    checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
+                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
+    raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
+_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
+	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
+	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
+	WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
+Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
+[2025-07-13 11:41:57,536][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
+[2025-07-13 11:41:57,537][06939] Adding new argument 'no_render'=False that is not in the saved config file!
+[2025-07-13 11:41:57,539][06939] Adding new argument 'save_video'=True that is not in the saved config file!
+[2025-07-13 11:41:57,543][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2025-07-13 11:41:57,547][06939] Adding new argument 'video_name'=None that is not in the saved config file!
+[2025-07-13 11:41:57,548][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
+[2025-07-13 11:41:57,549][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
+[2025-07-13 11:41:57,550][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
+[2025-07-13 11:41:57,551][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
+[2025-07-13 11:41:57,552][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2025-07-13 11:41:57,553][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2025-07-13 11:41:57,554][06939] Adding new argument 'train_script'=None that is not in the saved config file!
+[2025-07-13 11:41:57,555][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2025-07-13 11:41:57,556][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2025-07-13 11:41:57,602][06939] RunningMeanStd input shape: (3, 72, 128)
+[2025-07-13 11:41:57,603][06939] RunningMeanStd input shape: (1,)
+[2025-07-13 11:41:57,618][06939] ConvEncoder: input_channels=3
+[2025-07-13 11:41:57,675][06939] Conv encoder output size: 512
+[2025-07-13 11:41:57,676][06939] Policy head output size: 512
+[2025-07-13 11:42:44,481][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
+[2025-07-13 11:42:44,483][06939] Adding new argument 'no_render'=False that is not in the saved config file!
+[2025-07-13 11:42:44,484][06939] Adding new argument 'save_video'=True that is not in the saved config file!
+[2025-07-13 11:42:44,486][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2025-07-13 11:42:44,489][06939] Adding new argument 'video_name'=None that is not in the saved config file!
+[2025-07-13 11:42:44,489][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
+[2025-07-13 11:42:44,491][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
+[2025-07-13 11:42:44,492][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
+[2025-07-13 11:42:44,492][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
+[2025-07-13 11:42:44,493][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2025-07-13 11:42:44,494][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2025-07-13 11:42:44,494][06939] Adding new argument 'train_script'=None that is not in the saved config file!
+[2025-07-13 11:42:44,499][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2025-07-13 11:42:44,501][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2025-07-13 11:42:44,558][06939] RunningMeanStd input shape: (3, 72, 128)
+[2025-07-13 11:42:44,560][06939] RunningMeanStd input shape: (1,)
+[2025-07-13 11:42:44,577][06939] ConvEncoder: input_channels=3
+[2025-07-13 11:42:44,631][06939] Conv encoder output size: 512
+[2025-07-13 11:42:44,633][06939] Policy head output size: 512
+[2025-07-13 11:43:21,556][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
+[2025-07-13 11:43:21,558][06939] Adding new argument 'no_render'=True that is not in the saved config file!
+[2025-07-13 11:43:21,559][06939] Adding new argument 'save_video'=True that is not in the saved config file!
+[2025-07-13 11:43:21,560][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2025-07-13 11:43:21,562][06939] Adding new argument 'video_name'=None that is not in the saved config file!
+[2025-07-13 11:43:21,563][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
+[2025-07-13 11:43:21,564][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
+[2025-07-13 11:43:21,565][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
+[2025-07-13 11:43:21,567][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
+[2025-07-13 11:43:21,568][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2025-07-13 11:43:21,569][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2025-07-13 11:43:21,570][06939] Adding new argument 'train_script'=None that is not in the saved config file!
+[2025-07-13 11:43:21,571][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2025-07-13 11:43:21,572][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2025-07-13 11:43:21,603][06939] RunningMeanStd input shape: (3, 72, 128)
+[2025-07-13 11:43:21,604][06939] RunningMeanStd input shape: (1,)
+[2025-07-13 11:43:21,615][06939] ConvEncoder: input_channels=3
+[2025-07-13 11:43:21,654][06939] Conv encoder output size: 512
+[2025-07-13 11:43:21,657][06939] Policy head output size: 512
+[2025-07-13 11:43:22,337][06939] Num frames 100...
+[2025-07-13 11:43:22,472][06939] Num frames 200...
+[2025-07-13 11:43:22,604][06939] Num frames 300...
+[2025-07-13 11:43:22,737][06939] Num frames 400...
+[2025-07-13 11:43:22,814][06939] Avg episode rewards: #0: 5.160, true rewards: #0: 4.160
+[2025-07-13 11:43:22,815][06939] Avg episode reward: 5.160, avg true_objective: 4.160
+[2025-07-13 11:43:22,936][06939] Num frames 500...
+[2025-07-13 11:43:23,072][06939] Num frames 600...
+[2025-07-13 11:43:23,202][06939] Num frames 700...
+[2025-07-13 11:43:23,349][06939] Avg episode rewards: #0: 4.840, true rewards: #0: 3.840
+[2025-07-13 11:43:23,350][06939] Avg episode reward: 4.840, avg true_objective: 3.840
+[2025-07-13 11:43:23,395][06939] Num frames 800...
+[2025-07-13 11:43:23,523][06939] Num frames 900...
+[2025-07-13 11:43:23,655][06939] Num frames 1000...
+[2025-07-13 11:43:23,790][06939] Num frames 1100...
+[2025-07-13 11:43:23,911][06939] Avg episode rewards: #0: 4.507, true rewards: #0: 3.840
+[2025-07-13 11:43:23,912][06939] Avg episode reward: 4.507, avg true_objective: 3.840
+[2025-07-13 11:43:23,984][06939] Num frames 1200...
+[2025-07-13 11:43:24,109][06939] Num frames 1300...
+[2025-07-13 11:43:24,240][06939] Num frames 1400...
+[2025-07-13 11:43:24,368][06939] Num frames 1500...
+[2025-07-13 11:43:24,473][06939] Avg episode rewards: #0: 4.340, true rewards: #0: 3.840
+[2025-07-13 11:43:24,474][06939] Avg episode reward: 4.340, avg true_objective: 3.840
+[2025-07-13 11:43:24,569][06939] Num frames 1600...
+[2025-07-13 11:43:24,702][06939] Num frames 1700...
+[2025-07-13 11:43:24,892][06939] Avg episode rewards: #0: 3.992, true rewards: #0: 3.592
+[2025-07-13 11:43:24,893][06939] Avg episode reward: 3.992, avg true_objective: 3.592
+[2025-07-13 11:43:36,753][06939] Replay video saved to ./train_dir/vizdoom_exp/replay.mp4!
+[2025-07-13 11:49:27,400][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
+[2025-07-13 11:49:27,401][06939] Overriding arg 'num_workers' with value 1 passed from command line
+[2025-07-13 11:49:27,402][06939] Adding new argument 'no_render'=True that is not in the saved config file!
+[2025-07-13 11:49:27,403][06939] Adding new argument 'save_video'=True that is not in the saved config file!
+[2025-07-13 11:49:27,404][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2025-07-13 11:49:27,405][06939] Adding new argument 'video_name'=None that is not in the saved config file!
+[2025-07-13 11:49:27,406][06939] Adding new argument 'max_num_frames'=1600 that is not in the saved config file!
+[2025-07-13 11:49:27,407][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
+[2025-07-13 11:49:27,408][06939] Adding new argument 'push_to_hub'=True that is not in the saved config file!
+[2025-07-13 11:49:27,409][06939] Adding new argument 'hf_repository'='lokeessshhhh/rl_vizdoom_health_gathering' that is not in the saved config file!
+[2025-07-13 11:49:27,410][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2025-07-13 11:49:27,412][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2025-07-13 11:49:27,413][06939] Adding new argument 'train_script'=None that is not in the saved config file!
+[2025-07-13 11:49:27,414][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2025-07-13 11:49:27,417][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2025-07-13 11:49:27,440][06939] RunningMeanStd input shape: (3, 72, 128)
+[2025-07-13 11:49:27,442][06939] RunningMeanStd input shape: (1,)
+[2025-07-13 11:49:27,452][06939] ConvEncoder: input_channels=3
+[2025-07-13 11:49:27,490][06939] Conv encoder output size: 512
+[2025-07-13 11:49:27,491][06939] Policy head output size: 512
+[2025-07-13 11:49:27,932][06939] Num frames 100...
+[2025-07-13 11:49:28,063][06939] Num frames 200...
+[2025-07-13 11:49:28,194][06939] Num frames 300...
+[2025-07-13 11:49:28,322][06939] Num frames 400...
+[2025-07-13 11:49:28,439][06939] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480
+[2025-07-13 11:49:28,441][06939] Avg episode reward: 5.480, avg true_objective: 4.480
+[2025-07-13 11:49:28,510][06939] Num frames 500...
+[2025-07-13 11:49:28,641][06939] Num frames 600...
+[2025-07-13 11:49:28,780][06939] Num frames 700...
+[2025-07-13 11:49:28,911][06939] Num frames 800...
+[2025-07-13 11:49:29,008][06939] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
+[2025-07-13 11:49:29,009][06939] Avg episode reward: 4.660, avg true_objective: 4.160
+[2025-07-13 11:49:29,107][06939] Num frames 900...
+[2025-07-13 11:49:29,241][06939] Num frames 1000...
+[2025-07-13 11:49:29,372][06939] Num frames 1100...
+[2025-07-13 11:49:29,503][06939] Num frames 1200...
+[2025-07-13 11:49:29,580][06939] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
+[2025-07-13 11:49:29,581][06939] Avg episode reward: 4.387, avg true_objective: 4.053
+[2025-07-13 11:49:29,694][06939] Num frames 1300...
+[2025-07-13 11:49:29,843][06939] Num frames 1400...
+[2025-07-13 11:49:29,979][06939] Num frames 1500...
+[2025-07-13 11:49:30,108][06939] Num frames 1600...
+[2025-07-13 11:49:39,419][06939] Replay video saved to ./train_dir/vizdoom_exp/replay.mp4!
+[2025-07-13 11:49:50,742][06939] The model has been pushed to https://huggingface.co/lokeessshhhh/rl_vizdoom_health_gathering
+[2025-07-13 11:56:39,097][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
+[2025-07-13 11:56:39,099][06939] Overriding arg 'num_workers' with value 1 passed from command line
+[2025-07-13 11:56:39,101][06939] Adding new argument 'no_render'=True that is not in the saved config file!
+[2025-07-13 11:56:39,102][06939] Adding new argument 'save_video'=True that is not in the saved config file!
+[2025-07-13 11:56:39,103][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2025-07-13 11:56:39,105][06939] Adding new argument 'video_name'=None that is not in the saved config file!
+[2025-07-13 11:56:39,105][06939] Adding new argument 'max_num_frames'=1600 that is not in the saved config file!
+[2025-07-13 11:56:39,108][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
+[2025-07-13 11:56:39,109][06939] Adding new argument 'push_to_hub'=True that is not in the saved config file!
+[2025-07-13 11:56:39,109][06939] Adding new argument 'hf_repository'='lokeessshhhh/rl_vizdoom_health_gathering' that is not in the saved config file!
+[2025-07-13 11:56:39,110][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2025-07-13 11:56:39,114][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2025-07-13 11:56:39,115][06939] Adding new argument 'train_script'=None that is not in the saved config file!
+[2025-07-13 11:56:39,115][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2025-07-13 11:56:39,118][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2025-07-13 11:56:39,161][06939] RunningMeanStd input shape: (3, 72, 128)
+[2025-07-13 11:56:39,164][06939] RunningMeanStd input shape: (1,)
+[2025-07-13 11:56:39,181][06939] ConvEncoder: input_channels=3
+[2025-07-13 11:56:39,235][06939] Conv encoder output size: 512
+[2025-07-13 11:56:39,236][06939] Policy head output size: 512
+[2025-07-13 11:56:39,919][06939] Num frames 100...
+[2025-07-13 11:56:40,107][06939] Num frames 200...
+[2025-07-13 11:56:40,244][06939] Num frames 300...
+[2025-07-13 11:56:40,408][06939] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2025-07-13 11:56:40,409][06939] Avg episode reward: 3.840, avg true_objective: 3.840
+[2025-07-13 11:56:40,432][06939] Num frames 400...
+[2025-07-13 11:56:40,557][06939] Num frames 500...
+[2025-07-13 11:56:40,695][06939] Num frames 600...
+[2025-07-13 11:56:40,823][06939] Num frames 700...
+[2025-07-13 11:56:40,969][06939] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2025-07-13 11:56:40,970][06939] Avg episode reward: 3.840, avg true_objective: 3.840
+[2025-07-13 11:56:41,012][06939] Num frames 800...
+[2025-07-13 11:56:41,135][06939] Num frames 900...
+[2025-07-13 11:56:41,266][06939] Num frames 1000...
+[2025-07-13 11:56:41,352][06939] Avg episode rewards: #0: 3.413, true rewards: #0: 3.413
+[2025-07-13 11:56:41,353][06939] Avg episode reward: 3.413, avg true_objective: 3.413
+[2025-07-13 11:56:41,448][06939] Num frames 1100...
+[2025-07-13 11:56:41,573][06939] Num frames 1200...
+[2025-07-13 11:56:41,707][06939] Num frames 1300...
+[2025-07-13 11:56:41,833][06939] Num frames 1400...
+[2025-07-13 11:56:41,897][06939] Avg episode rewards: #0: 3.520, true rewards: #0: 3.520
+[2025-07-13 11:56:41,899][06939] Avg episode reward: 3.520, avg true_objective: 3.520
+[2025-07-13 11:56:42,019][06939] Num frames 1500...
+[2025-07-13 11:56:42,146][06939] Num frames 1600...
+[2025-07-13 11:56:50,594][06939] Replay video saved to ./train_dir/vizdoom_exp/replay.mp4!