lokeessshhhh commited on
Commit
709db59
·
verified ·
1 Parent(s): 114a9c7

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ replay.mp4 filter=lfs diff=lfs merge=lfs -text
.summary/0/events.out.tfevents.1752406344.7ee6365361bc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc98f0422f716f0bebd12e0b5ff32b15f4cdc828223e68941422af2f924a08d1
3
+ size 71424
.summary/0/events.out.tfevents.1752406515.7ee6365361bc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb5cf132b02e682e1d7a043080a6d13bcd473a9ba34ccca634d40f8168e1571d
3
+ size 68936
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sample-factory
3
+ tags:
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - sample-factory
7
+ model-index:
8
+ - name: APPO
9
+ results:
10
+ - task:
11
+ type: reinforcement-learning
12
+ name: reinforcement-learning
13
+ dataset:
14
+ name: doom_health_gathering_supreme
15
+ type: doom_health_gathering_supreme
16
+ metrics:
17
+ - type: mean_reward
18
+ value: 3.52 +/- 0.55
19
+ name: mean_reward
20
+ verified: false
21
+ ---
22
+
23
+ A(n) **APPO** model trained on the **doom_health_gathering_supreme** environment.
24
+
25
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
27
+
28
+
29
+ ## Downloading the model
30
+
31
+ After installing Sample-Factory, download the model with:
32
+ ```
33
+ python -m sample_factory.huggingface.load_from_hub -r lokeessshhhh/rl_vizdoom_health_gathering
34
+ ```
35
+
36
+
37
+ ## Using the model
38
+
39
+ To run the model after download, use the `enjoy` script corresponding to this environment:
40
+ ```
41
+ python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_vizdoom_health_gathering
42
+ ```
43
+
44
+
45
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
46
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
47
+
48
+ ## Training with this model
49
+
50
+ To continue training with this model, use the `train` script corresponding to this environment:
51
+ ```
52
+ python -m <path.to.train.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_vizdoom_health_gathering --restart_behavior=resume --train_for_env_steps=10000000000
53
+ ```
54
+
55
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
56
+
checkpoint_p0/best_000000041_167936_reward_4.766.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee735185d5c0abf6417575a494c0d262bb25bd74e88ccf4502ed03c8f98c612f
3
+ size 34929051
checkpoint_p0/checkpoint_000000051_208896.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945044c2189c3dcb74e02f1fe84203a3a416e8ffd3c9fc3fedb9ce2ed37d7087
3
+ size 34929477
checkpoint_p0/checkpoint_000000053_217088.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9cde09fbf8c2bfb97d48a33ad37407e78cd2b884828bd0954bf6f46f6fa5e5
3
+ size 34929477
config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "doom_health_gathering_supreme",
5
+ "experiment": "vizdoom_exp",
6
+ "train_dir": "./train_dir",
7
+ "restart_behavior": "resume",
8
+ "device": "gpu",
9
+ "seed": null,
10
+ "num_policies": 1,
11
+ "async_rl": true,
12
+ "serial_mode": false,
13
+ "batched_sampling": false,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 2,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 1000,
18
+ "num_workers": 2,
19
+ "num_envs_per_worker": 2,
20
+ "batch_size": 1024,
21
+ "num_batches_per_epoch": 1,
22
+ "num_epochs": 1,
23
+ "rollout": 32,
24
+ "recurrence": 32,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 1.0,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": false,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.001,
32
+ "value_loss_coeff": 0.5,
33
+ "kl_loss_coeff": 0.0,
34
+ "exploration_loss": "symmetric_kl",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.1,
37
+ "ppo_clip_value": 0.2,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 4.0,
46
+ "learning_rate": 0.0001,
47
+ "lr_schedule": "constant",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "lr_adaptive_min": 1e-06,
50
+ "lr_adaptive_max": 0.01,
51
+ "obs_subtract_mean": 0.0,
52
+ "obs_scale": 255.0,
53
+ "normalize_input": true,
54
+ "normalize_input_keys": null,
55
+ "decorrelate_experience_max_seconds": 0,
56
+ "decorrelate_envs_on_one_worker": true,
57
+ "actor_worker_gpus": [],
58
+ "set_workers_cpu_affinity": true,
59
+ "force_envs_single_thread": false,
60
+ "default_niceness": 0,
61
+ "log_to_file": true,
62
+ "experiment_summaries_interval": 10,
63
+ "flush_summaries_interval": 30,
64
+ "stats_avg": 100,
65
+ "summaries_use_frameskip": true,
66
+ "heartbeat_interval": 20,
67
+ "heartbeat_reporting_interval": 600,
68
+ "train_for_env_steps": 200000,
69
+ "train_for_seconds": 10000000000,
70
+ "save_every_sec": 120,
71
+ "keep_checkpoints": 2,
72
+ "load_checkpoint_kind": "latest",
73
+ "save_milestones_sec": -1,
74
+ "save_best_every_sec": 5,
75
+ "save_best_metric": "reward",
76
+ "save_best_after": 100000,
77
+ "benchmark": false,
78
+ "encoder_mlp_layers": [
79
+ 512,
80
+ 512
81
+ ],
82
+ "encoder_conv_architecture": "convnet_simple",
83
+ "encoder_conv_mlp_layers": [
84
+ 512
85
+ ],
86
+ "use_rnn": true,
87
+ "rnn_size": 512,
88
+ "rnn_type": "gru",
89
+ "rnn_num_layers": 1,
90
+ "decoder_mlp_layers": [],
91
+ "nonlinearity": "elu",
92
+ "policy_initialization": "orthogonal",
93
+ "policy_init_gain": 1.0,
94
+ "actor_critic_share_weights": true,
95
+ "adaptive_stddev": true,
96
+ "continuous_tanh_scale": 0.0,
97
+ "initial_stddev": 1.0,
98
+ "use_env_info_cache": false,
99
+ "env_gpu_actions": false,
100
+ "env_gpu_observations": true,
101
+ "env_frameskip": 4,
102
+ "env_framestack": 1,
103
+ "pixel_format": "CHW",
104
+ "use_record_episode_statistics": false,
105
+ "with_wandb": false,
106
+ "wandb_user": null,
107
+ "wandb_project": "sample_factory",
108
+ "wandb_group": null,
109
+ "wandb_job_type": "SF",
110
+ "wandb_tags": [],
111
+ "with_pbt": false,
112
+ "pbt_mix_policies_in_one_env": true,
113
+ "pbt_period_env_steps": 5000000,
114
+ "pbt_start_mutation": 20000000,
115
+ "pbt_replace_fraction": 0.3,
116
+ "pbt_mutation_rate": 0.15,
117
+ "pbt_replace_reward_gap": 0.1,
118
+ "pbt_replace_reward_gap_absolute": 1e-06,
119
+ "pbt_optimize_gamma": false,
120
+ "pbt_target_objective": "true_objective",
121
+ "pbt_perturb_min": 1.1,
122
+ "pbt_perturb_max": 1.5,
123
+ "num_agents": -1,
124
+ "num_humans": 0,
125
+ "num_bots": -1,
126
+ "start_bot_difficulty": null,
127
+ "timelimit": null,
128
+ "res_w": 128,
129
+ "res_h": 72,
130
+ "wide_aspect_ratio": false,
131
+ "eval_env_frameskip": 1,
132
+ "fps": 35,
133
+ "command_line": "--env=doom_health_gathering_supreme --experiment=vizdoom_exp --train_dir=./train_dir --num_workers=2 --num_envs_per_worker=2",
134
+ "cli_args": {
135
+ "env": "doom_health_gathering_supreme",
136
+ "experiment": "vizdoom_exp",
137
+ "train_dir": "./train_dir",
138
+ "num_workers": 2,
139
+ "num_envs_per_worker": 2
140
+ },
141
+ "git_hash": "unknown",
142
+ "git_repo_name": "not a git repository"
143
+ }
replay.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c012dd4e71bbaffeb8d176b43f81991a69b62e99cb3e61c650b87c5c07cda2ba
3
+ size 2306568
sf_log.txt ADDED
@@ -0,0 +1,695 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-07-13 11:32:27,039][02343] Saving configuration to ./train_dir/vizdoom_exp/config.json...
2
+ [2025-07-13 11:32:27,041][02343] Rollout worker 0 uses device cpu
3
+ [2025-07-13 11:32:27,042][02343] Rollout worker 1 uses device cpu
4
+ [2025-07-13 11:32:27,189][02343] Using GPUs [0] for process 0 (actually maps to GPUs [0])
5
+ [2025-07-13 11:32:27,190][02343] InferenceWorker_p0-w0: min num requests: 1
6
+ [2025-07-13 11:32:27,200][02343] Starting all processes...
7
+ [2025-07-13 11:32:27,201][02343] Starting process learner_proc0
8
+ [2025-07-13 11:32:27,262][02343] Starting all processes...
9
+ [2025-07-13 11:32:27,268][02343] Starting process inference_proc0-0
10
+ [2025-07-13 11:32:27,269][02343] Starting process rollout_proc0
11
+ [2025-07-13 11:32:27,269][02343] Starting process rollout_proc1
12
+ [2025-07-13 11:32:33,042][06118] Worker 1 uses CPU cores [1]
13
+ [2025-07-13 11:32:33,235][06119] Using GPUs [0] for process 0 (actually maps to GPUs [0])
14
+ [2025-07-13 11:32:33,235][06119] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
15
+ [2025-07-13 11:32:33,255][06119] Num visible devices: 1
16
+ [2025-07-13 11:32:33,310][06117] Worker 0 uses CPU cores [0]
17
+ [2025-07-13 11:32:33,348][06110] Using GPUs [0] for process 0 (actually maps to GPUs [0])
18
+ [2025-07-13 11:32:33,348][06110] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
19
+ [2025-07-13 11:32:33,365][06110] Num visible devices: 1
20
+ [2025-07-13 11:32:33,365][06110] Starting seed is not provided
21
+ [2025-07-13 11:32:33,366][06110] Using GPUs [0] for process 0 (actually maps to GPUs [0])
22
+ [2025-07-13 11:32:33,366][06110] Initializing actor-critic model on device cuda:0
23
+ [2025-07-13 11:32:33,366][06110] RunningMeanStd input shape: (3, 72, 128)
24
+ [2025-07-13 11:32:33,369][06110] RunningMeanStd input shape: (1,)
25
+ [2025-07-13 11:32:33,380][06110] ConvEncoder: input_channels=3
26
+ [2025-07-13 11:32:33,653][06110] Conv encoder output size: 512
27
+ [2025-07-13 11:32:33,653][06110] Policy head output size: 512
28
+ [2025-07-13 11:32:33,708][06110] Created Actor Critic model with architecture:
29
+ [2025-07-13 11:32:33,709][06110] ActorCriticSharedWeights(
30
+ (obs_normalizer): ObservationNormalizer(
31
+ (running_mean_std): RunningMeanStdDictInPlace(
32
+ (running_mean_std): ModuleDict(
33
+ (obs): RunningMeanStdInPlace()
34
+ )
35
+ )
36
+ )
37
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
38
+ (encoder): VizdoomEncoder(
39
+ (basic_encoder): ConvEncoder(
40
+ (enc): RecursiveScriptModule(
41
+ original_name=ConvEncoderImpl
42
+ (conv_head): RecursiveScriptModule(
43
+ original_name=Sequential
44
+ (0): RecursiveScriptModule(original_name=Conv2d)
45
+ (1): RecursiveScriptModule(original_name=ELU)
46
+ (2): RecursiveScriptModule(original_name=Conv2d)
47
+ (3): RecursiveScriptModule(original_name=ELU)
48
+ (4): RecursiveScriptModule(original_name=Conv2d)
49
+ (5): RecursiveScriptModule(original_name=ELU)
50
+ )
51
+ (mlp_layers): RecursiveScriptModule(
52
+ original_name=Sequential
53
+ (0): RecursiveScriptModule(original_name=Linear)
54
+ (1): RecursiveScriptModule(original_name=ELU)
55
+ )
56
+ )
57
+ )
58
+ )
59
+ (core): ModelCoreRNN(
60
+ (core): GRU(512, 512)
61
+ )
62
+ (decoder): MlpDecoder(
63
+ (mlp): Identity()
64
+ )
65
+ (critic_linear): Linear(in_features=512, out_features=1, bias=True)
66
+ (action_parameterization): ActionParameterizationDefault(
67
+ (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
68
+ )
69
+ )
70
+ [2025-07-13 11:32:33,948][06110] Using optimizer <class 'torch.optim.adam.Adam'>
71
+ [2025-07-13 11:32:38,823][06110] No checkpoints found
72
+ [2025-07-13 11:32:38,823][06110] Did not load from checkpoint, starting from scratch!
73
+ [2025-07-13 11:32:38,823][06110] Initialized policy 0 weights for model version 0
74
+ [2025-07-13 11:32:38,826][06110] LearnerWorker_p0 finished initialization!
75
+ [2025-07-13 11:32:38,827][06110] Using GPUs [0] for process 0 (actually maps to GPUs [0])
76
+ [2025-07-13 11:32:38,965][06119] RunningMeanStd input shape: (3, 72, 128)
77
+ [2025-07-13 11:32:38,966][06119] RunningMeanStd input shape: (1,)
78
+ [2025-07-13 11:32:38,977][06119] ConvEncoder: input_channels=3
79
+ [2025-07-13 11:32:39,078][06119] Conv encoder output size: 512
80
+ [2025-07-13 11:32:39,079][06119] Policy head output size: 512
81
+ [2025-07-13 11:32:39,115][02343] Inference worker 0-0 is ready!
82
+ [2025-07-13 11:32:39,117][02343] All inference workers are ready! Signal rollout workers to start!
83
+ [2025-07-13 11:32:39,167][06118] Doom resolution: 160x120, resize resolution: (128, 72)
84
+ [2025-07-13 11:32:39,177][06117] Doom resolution: 160x120, resize resolution: (128, 72)
85
+ [2025-07-13 11:32:39,251][02343] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
86
+ [2025-07-13 11:32:39,562][06118] Decorrelating experience for 0 frames...
87
+ [2025-07-13 11:32:39,581][06117] Decorrelating experience for 0 frames...
88
+ [2025-07-13 11:32:40,000][06118] Decorrelating experience for 32 frames...
89
+ [2025-07-13 11:32:40,013][06117] Decorrelating experience for 32 frames...
90
+ [2025-07-13 11:32:44,253][02343] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 138.2. Samples: 691. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
91
+ [2025-07-13 11:32:44,254][02343] Avg episode reward: [(0, '4.091')]
92
+ [2025-07-13 11:32:47,181][02343] Heartbeat connected on Batcher_0
93
+ [2025-07-13 11:32:47,185][02343] Heartbeat connected on LearnerWorker_p0
94
+ [2025-07-13 11:32:47,194][02343] Heartbeat connected on InferenceWorker_p0-w0
95
+ [2025-07-13 11:32:47,199][02343] Heartbeat connected on RolloutWorker_w0
96
+ [2025-07-13 11:32:47,202][02343] Heartbeat connected on RolloutWorker_w1
97
+ [2025-07-13 11:32:49,255][02343] Fps is (10 sec: 1228.3, 60 sec: 1228.3, 300 sec: 1228.3). Total num frames: 12288. Throughput: 0: 305.8. Samples: 3059. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
98
+ [2025-07-13 11:32:49,257][02343] Avg episode reward: [(0, '4.554')]
99
+ [2025-07-13 11:32:54,251][02343] Fps is (10 sec: 2048.3, 60 sec: 1365.3, 300 sec: 1365.3). Total num frames: 20480. Throughput: 0: 395.1. Samples: 5927. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
100
+ [2025-07-13 11:32:54,256][02343] Avg episode reward: [(0, '4.525')]
101
+ [2025-07-13 11:32:59,251][02343] Fps is (10 sec: 2048.8, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 32768. Throughput: 0: 377.1. Samples: 7543. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
102
+ [2025-07-13 11:32:59,255][02343] Avg episode reward: [(0, '4.484')]
103
+ [2025-07-13 11:33:02,707][06119] Updated weights for policy 0, policy_version 10 (0.0020)
104
+ [2025-07-13 11:33:04,252][02343] Fps is (10 sec: 2048.0, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 40960. Throughput: 0: 418.6. Samples: 10465. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
105
+ [2025-07-13 11:33:04,256][02343] Avg episode reward: [(0, '4.518')]
106
+ [2025-07-13 11:33:09,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1774.9, 300 sec: 1774.9). Total num frames: 53248. Throughput: 0: 458.9. Samples: 13767. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
107
+ [2025-07-13 11:33:09,255][02343] Avg episode reward: [(0, '4.456')]
108
+ [2025-07-13 11:33:14,251][02343] Fps is (10 sec: 2457.6, 60 sec: 1872.5, 300 sec: 1872.5). Total num frames: 65536. Throughput: 0: 442.7. Samples: 15493. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
109
+ [2025-07-13 11:33:14,254][02343] Avg episode reward: [(0, '4.435')]
110
+ [2025-07-13 11:33:19,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1843.2, 300 sec: 1843.2). Total num frames: 73728. Throughput: 0: 456.6. Samples: 18262. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
111
+ [2025-07-13 11:33:19,255][02343] Avg episode reward: [(0, '4.358')]
112
+ [2025-07-13 11:33:22,039][06119] Updated weights for policy 0, policy_version 20 (0.0013)
113
+ [2025-07-13 11:33:24,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 86016. Throughput: 0: 482.6. Samples: 21718. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
114
+ [2025-07-13 11:33:24,254][02343] Avg episode reward: [(0, '4.320')]
115
+ [2025-07-13 11:33:29,254][02343] Fps is (10 sec: 2047.9, 60 sec: 1884.1, 300 sec: 1884.1). Total num frames: 94208. Throughput: 0: 505.8. Samples: 23452. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
116
+ [2025-07-13 11:33:29,255][02343] Avg episode reward: [(0, '4.360')]
117
+ [2025-07-13 11:33:34,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1936.3, 300 sec: 1936.3). Total num frames: 106496. Throughput: 0: 513.3. Samples: 26154. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
118
+ [2025-07-13 11:33:34,256][02343] Avg episode reward: [(0, '4.347')]
119
+ [2025-07-13 11:33:34,262][06110] Saving new best policy, reward=4.347!
120
+ [2025-07-13 11:33:39,251][02343] Fps is (10 sec: 2048.1, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 114688. Throughput: 0: 525.0. Samples: 29551. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
121
+ [2025-07-13 11:33:39,253][02343] Avg episode reward: [(0, '4.298')]
122
+ [2025-07-13 11:33:41,944][06119] Updated weights for policy 0, policy_version 30 (0.0014)
123
+ [2025-07-13 11:33:44,251][02343] Fps is (10 sec: 1638.4, 60 sec: 2048.0, 300 sec: 1890.5). Total num frames: 122880. Throughput: 0: 516.9. Samples: 30803. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
124
+ [2025-07-13 11:33:44,255][02343] Avg episode reward: [(0, '4.267')]
125
+ [2025-07-13 11:33:49,251][02343] Fps is (10 sec: 2048.0, 60 sec: 2048.1, 300 sec: 1931.0). Total num frames: 135168. Throughput: 0: 522.6. Samples: 33984. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
126
+ [2025-07-13 11:33:49,255][02343] Avg episode reward: [(0, '4.283')]
127
+ [2025-07-13 11:33:54,253][02343] Fps is (10 sec: 2457.2, 60 sec: 2116.2, 300 sec: 1966.0). Total num frames: 147456. Throughput: 0: 523.3. Samples: 37317. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
128
+ [2025-07-13 11:33:54,254][02343] Avg episode reward: [(0, '4.434')]
129
+ [2025-07-13 11:33:54,261][06110] Saving new best policy, reward=4.434!
130
+ [2025-07-13 11:33:59,251][02343] Fps is (10 sec: 2048.0, 60 sec: 2048.0, 300 sec: 1945.6). Total num frames: 155648. Throughput: 0: 509.5. Samples: 38420. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
131
+ [2025-07-13 11:33:59,258][02343] Avg episode reward: [(0, '4.585')]
132
+ [2025-07-13 11:33:59,264][06110] Saving new best policy, reward=4.585!
133
+ [2025-07-13 11:34:01,670][06119] Updated weights for policy 0, policy_version 40 (0.0013)
134
+ [2025-07-13 11:34:04,251][02343] Fps is (10 sec: 2048.3, 60 sec: 2116.3, 300 sec: 1975.7). Total num frames: 167936. Throughput: 0: 525.0. Samples: 41885. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
135
+ [2025-07-13 11:34:04,255][02343] Avg episode reward: [(0, '4.766')]
136
+ [2025-07-13 11:34:04,263][06110] Saving new best policy, reward=4.766!
137
+ [2025-07-13 11:34:09,259][02343] Fps is (10 sec: 2046.4, 60 sec: 2047.7, 300 sec: 1956.8). Total num frames: 176128. Throughput: 0: 510.4. Samples: 44692. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
138
+ [2025-07-13 11:34:09,261][02343] Avg episode reward: [(0, '4.719')]
139
+ [2025-07-13 11:34:14,251][02343] Fps is (10 sec: 2048.0, 60 sec: 2048.0, 300 sec: 1983.3). Total num frames: 188416. Throughput: 0: 507.6. Samples: 46295. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
140
+ [2025-07-13 11:34:14,255][02343] Avg episode reward: [(0, '4.561')]
141
+ [2025-07-13 11:34:19,251][02343] Fps is (10 sec: 2459.5, 60 sec: 2116.3, 300 sec: 2007.0). Total num frames: 200704. Throughput: 0: 525.2. Samples: 49790. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
142
+ [2025-07-13 11:34:19,255][02343] Avg episode reward: [(0, '4.330')]
143
+ [2025-07-13 11:34:21,466][06119] Updated weights for policy 0, policy_version 50 (0.0017)
144
+ [2025-07-13 11:34:24,257][02343] Fps is (10 sec: 2046.8, 60 sec: 2047.8, 300 sec: 1989.4). Total num frames: 208896. Throughput: 0: 509.2. Samples: 52469. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
145
+ [2025-07-13 11:34:24,262][02343] Avg episode reward: [(0, '4.333')]
146
+ [2025-07-13 11:34:24,270][06110] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000051_208896.pth...
147
+ [2025-07-13 11:34:28,853][02343] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 2343], exiting...
148
+ [2025-07-13 11:34:28,857][06110] Stopping Batcher_0...
149
+ [2025-07-13 11:34:28,857][06110] Loop batcher_evt_loop terminating...
150
+ [2025-07-13 11:34:28,859][06110] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
151
+ [2025-07-13 11:34:28,899][06118] EvtLoop [rollout_proc1_evt_loop, process=rollout_proc1] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance1'), args=(1, 0)
152
+ Traceback (most recent call last):
153
+ File "/usr/local/lib/python3.11/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal
154
+ slot_callable(*args)
155
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts
156
+ complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)
157
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
158
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts
159
+ new_obs, rewards, terminated, truncated, infos = e.step(actions)
160
+ ^^^^^^^^^^^^^^^
161
+ File "/usr/local/lib/python3.11/dist-packages/gymnasium/core.py", line 461, in step
162
+ return self.env.step(action)
163
+ ^^^^^^^^^^^^^^^^^^^^^
164
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step
165
+ obs, rew, terminated, truncated, info = self.env.step(action)
166
+ ^^^^^^^^^^^^^^^^^^^^^
167
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step
168
+ obs, rew, terminated, truncated, info = self.env.step(action)
169
+ ^^^^^^^^^^^^^^^^^^^^^
170
+ File "/usr/local/lib/python3.11/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step
171
+ observation, reward, terminated, truncated, info = self.env.step(action)
172
+ ^^^^^^^^^^^^^^^^^^^^^
173
+ File "/usr/local/lib/python3.11/dist-packages/gymnasium/core.py", line 522, in step
174
+ observation, reward, terminated, truncated, info = self.env.step(action)
175
+ ^^^^^^^^^^^^^^^^^^^^^
176
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step
177
+ obs, reward, terminated, truncated, info = self.env.step(action)
178
+ ^^^^^^^^^^^^^^^^^^^^^
179
+ File "/usr/local/lib/python3.11/dist-packages/gymnasium/core.py", line 461, in step
180
+ return self.env.step(action)
181
+ ^^^^^^^^^^^^^^^^^^^^^
182
+ File "/usr/local/lib/python3.11/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step
183
+ obs, reward, terminated, truncated, info = self.env.step(action)
184
+ ^^^^^^^^^^^^^^^^^^^^^
185
+ File "/usr/local/lib/python3.11/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step
186
+ reward = self.game.make_action(actions_flattened, self.skip_frames)
187
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
188
+ vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.
189
+ [2025-07-13 11:34:28,912][06118] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc1_evt_loop
190
+ [2025-07-13 11:34:28,945][06119] Weights refcount: 2 0
191
+ [2025-07-13 11:34:28,949][06119] Stopping InferenceWorker_p0-w0...
192
+ [2025-07-13 11:34:28,949][06119] Loop inference_proc0-0_evt_loop terminating...
193
+ [2025-07-13 11:34:29,010][06110] Stopping LearnerWorker_p0...
194
+ [2025-07-13 11:34:29,011][06110] Loop learner_proc0_evt_loop terminating...
195
+ [2025-07-13 11:35:24,325][06939] Saving configuration to ./train_dir/vizdoom_exp/config.json...
196
+ [2025-07-13 11:35:24,327][06939] Rollout worker 0 uses device cpu
197
+ [2025-07-13 11:35:24,328][06939] Rollout worker 1 uses device cpu
198
+ [2025-07-13 11:35:24,454][06939] Using GPUs [0] for process 0 (actually maps to GPUs [0])
199
+ [2025-07-13 11:35:24,455][06939] InferenceWorker_p0-w0: min num requests: 1
200
+ [2025-07-13 11:35:24,467][06939] Starting all processes...
201
+ [2025-07-13 11:35:24,468][06939] Starting process learner_proc0
202
+ [2025-07-13 11:35:24,769][06939] Starting all processes...
203
+ [2025-07-13 11:35:24,783][06939] Starting process inference_proc0-0
204
+ [2025-07-13 11:35:24,784][06939] Starting process rollout_proc0
205
+ [2025-07-13 11:35:24,784][06939] Starting process rollout_proc1
206
+ [2025-07-13 11:35:30,789][07359] Using GPUs [0] for process 0 (actually maps to GPUs [0])
207
+ [2025-07-13 11:35:30,794][07359] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
208
+ [2025-07-13 11:35:30,857][07359] Num visible devices: 1
209
+ [2025-07-13 11:35:30,861][07359] Starting seed is not provided
210
+ [2025-07-13 11:35:30,861][07359] Using GPUs [0] for process 0 (actually maps to GPUs [0])
211
+ [2025-07-13 11:35:30,862][07359] Initializing actor-critic model on device cuda:0
212
+ [2025-07-13 11:35:30,863][07359] RunningMeanStd input shape: (3, 72, 128)
213
+ [2025-07-13 11:35:30,866][07359] RunningMeanStd input shape: (1,)
214
+ [2025-07-13 11:35:30,937][07359] ConvEncoder: input_channels=3
215
+ [2025-07-13 11:35:31,320][07368] Worker 1 uses CPU cores [1]
216
+ [2025-07-13 11:35:31,517][07359] Conv encoder output size: 512
217
+ [2025-07-13 11:35:31,517][07359] Policy head output size: 512
218
+ [2025-07-13 11:35:31,549][07359] Created Actor Critic model with architecture:
219
+ [2025-07-13 11:35:31,550][07359] ActorCriticSharedWeights(
220
+ (obs_normalizer): ObservationNormalizer(
221
+ (running_mean_std): RunningMeanStdDictInPlace(
222
+ (running_mean_std): ModuleDict(
223
+ (obs): RunningMeanStdInPlace()
224
+ )
225
+ )
226
+ )
227
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
228
+ (encoder): VizdoomEncoder(
229
+ (basic_encoder): ConvEncoder(
230
+ (enc): RecursiveScriptModule(
231
+ original_name=ConvEncoderImpl
232
+ (conv_head): RecursiveScriptModule(
233
+ original_name=Sequential
234
+ (0): RecursiveScriptModule(original_name=Conv2d)
235
+ (1): RecursiveScriptModule(original_name=ELU)
236
+ (2): RecursiveScriptModule(original_name=Conv2d)
237
+ (3): RecursiveScriptModule(original_name=ELU)
238
+ (4): RecursiveScriptModule(original_name=Conv2d)
239
+ (5): RecursiveScriptModule(original_name=ELU)
240
+ )
241
+ (mlp_layers): RecursiveScriptModule(
242
+ original_name=Sequential
243
+ (0): RecursiveScriptModule(original_name=Linear)
244
+ (1): RecursiveScriptModule(original_name=ELU)
245
+ )
246
+ )
247
+ )
248
+ )
249
+ (core): ModelCoreRNN(
250
+ (core): GRU(512, 512)
251
+ )
252
+ (decoder): MlpDecoder(
253
+ (mlp): Identity()
254
+ )
255
+ (critic_linear): Linear(in_features=512, out_features=1, bias=True)
256
+ (action_parameterization): ActionParameterizationDefault(
257
+ (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
258
+ )
259
+ )
260
+ [2025-07-13 11:35:31,593][07367] Using GPUs [0] for process 0 (actually maps to GPUs [0])
261
+ [2025-07-13 11:35:31,594][07367] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
262
+ [2025-07-13 11:35:31,634][07367] Num visible devices: 1
263
+ [2025-07-13 11:35:31,636][07369] Worker 0 uses CPU cores [0]
264
+ [2025-07-13 11:35:31,742][07359] Using optimizer <class 'torch.optim.adam.Adam'>
265
+ [2025-07-13 11:35:33,126][07359] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
266
+ [2025-07-13 11:35:33,127][07359] Could not load from checkpoint, attempt 0
267
+ Traceback (most recent call last):
268
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
269
+ checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
270
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
271
+ File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
272
+ raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
273
+ _pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, do those steps only if you trust the source of the checkpoint.
274
+ (1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
275
+ (2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
276
+ WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
277
+
278
+ Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
279
+ [2025-07-13 11:35:33,130][07359] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
280
+ [2025-07-13 11:35:33,131][07359] Could not load from checkpoint, attempt 1
281
+ Traceback (most recent call last):
282
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
283
+ checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
284
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
285
+ File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
286
+ raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
287
+ _pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, do those steps only if you trust the source of the checkpoint.
288
+ (1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
289
+ (2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
290
+ WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
291
+
292
+ Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
293
+ [2025-07-13 11:35:33,132][07359] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
294
+ [2025-07-13 11:35:33,132][07359] Could not load from checkpoint, attempt 2
295
+ Traceback (most recent call last):
296
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
297
+ checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
298
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
299
+ File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
300
+ raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
301
+ _pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, do those steps only if you trust the source of the checkpoint.
302
+ (1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
303
+ (2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
304
+ WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
305
+
306
+ Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
307
+ [2025-07-13 11:35:33,133][07359] Did not load from checkpoint, starting from scratch!
308
+ [2025-07-13 11:35:33,133][07359] Initialized policy 0 weights for model version 0
309
+ [2025-07-13 11:35:33,143][07359] LearnerWorker_p0 finished initialization!
310
+ [2025-07-13 11:35:33,143][07359] Using GPUs [0] for process 0 (actually maps to GPUs [0])
311
+ [2025-07-13 11:35:33,241][07367] RunningMeanStd input shape: (3, 72, 128)
312
+ [2025-07-13 11:35:33,243][07367] RunningMeanStd input shape: (1,)
313
+ [2025-07-13 11:35:33,253][07367] ConvEncoder: input_channels=3
314
+ [2025-07-13 11:35:33,370][07367] Conv encoder output size: 512
315
+ [2025-07-13 11:35:33,370][07367] Policy head output size: 512
316
+ [2025-07-13 11:35:33,403][06939] Inference worker 0-0 is ready!
317
+ [2025-07-13 11:35:33,404][06939] All inference workers are ready! Signal rollout workers to start!
318
+ [2025-07-13 11:35:33,457][07368] Doom resolution: 160x120, resize resolution: (128, 72)
319
+ [2025-07-13 11:35:33,456][07369] Doom resolution: 160x120, resize resolution: (128, 72)
320
+ [2025-07-13 11:35:33,844][07369] Decorrelating experience for 0 frames...
321
+ [2025-07-13 11:35:33,851][07368] Decorrelating experience for 0 frames...
322
+ [2025-07-13 11:35:34,276][07369] Decorrelating experience for 32 frames...
323
+ [2025-07-13 11:35:34,286][07368] Decorrelating experience for 32 frames...
324
+ [2025-07-13 11:35:35,802][06939] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
325
+ [2025-07-13 11:35:35,811][06939] Avg episode reward: [(0, '3.216')]
326
+ [2025-07-13 11:35:40,802][06939] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 273.6. Samples: 1368. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
327
+ [2025-07-13 11:35:40,807][06939] Avg episode reward: [(0, '4.118')]
328
+ [2025-07-13 11:35:44,442][06939] Heartbeat connected on Batcher_0
329
+ [2025-07-13 11:35:44,446][06939] Heartbeat connected on LearnerWorker_p0
330
+ [2025-07-13 11:35:44,456][06939] Heartbeat connected on InferenceWorker_p0-w0
331
+ [2025-07-13 11:35:44,462][06939] Heartbeat connected on RolloutWorker_w0
332
+ [2025-07-13 11:35:44,473][06939] Heartbeat connected on RolloutWorker_w1
333
+ [2025-07-13 11:35:45,802][06939] Fps is (10 sec: 1638.4, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 16384. Throughput: 0: 425.2. Samples: 4252. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
334
+ [2025-07-13 11:35:45,803][06939] Avg episode reward: [(0, '4.094')]
335
+ [2025-07-13 11:35:50,802][06939] Fps is (10 sec: 2457.7, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 24576. Throughput: 0: 400.2. Samples: 6003. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
336
+ [2025-07-13 11:35:50,803][06939] Avg episode reward: [(0, '4.254')]
337
+ [2025-07-13 11:35:55,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1843.2, 300 sec: 1843.2). Total num frames: 36864. Throughput: 0: 435.6. Samples: 8712. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
338
+ [2025-07-13 11:35:55,806][06939] Avg episode reward: [(0, '4.370')]
339
+ [2025-07-13 11:35:57,194][07367] Updated weights for policy 0, policy_version 10 (0.0023)
340
+ [2025-07-13 11:36:00,802][06939] Fps is (10 sec: 2457.6, 60 sec: 1966.1, 300 sec: 1966.1). Total num frames: 49152. Throughput: 0: 488.8. Samples: 12220. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
341
+ [2025-07-13 11:36:00,805][06939] Avg episode reward: [(0, '4.427')]
342
+ [2025-07-13 11:36:05,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 57344. Throughput: 0: 451.5. Samples: 13545. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
343
+ [2025-07-13 11:36:05,803][06939] Avg episode reward: [(0, '4.483')]
344
+ [2025-07-13 11:36:10,802][06939] Fps is (10 sec: 1638.4, 60 sec: 1872.5, 300 sec: 1872.5). Total num frames: 65536. Throughput: 0: 475.7. Samples: 16650. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
345
+ [2025-07-13 11:36:10,806][06939] Avg episode reward: [(0, '4.508')]
346
+ [2025-07-13 11:36:15,804][06939] Fps is (10 sec: 2047.6, 60 sec: 1945.5, 300 sec: 1945.5). Total num frames: 77824. Throughput: 0: 490.3. Samples: 19613. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
347
+ [2025-07-13 11:36:15,807][06939] Avg episode reward: [(0, '4.467')]
348
+ [2025-07-13 11:36:18,157][07367] Updated weights for policy 0, policy_version 20 (0.0020)
349
+ [2025-07-13 11:36:20,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 86016. Throughput: 0: 459.3. Samples: 20670. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
350
+ [2025-07-13 11:36:20,805][06939] Avg episode reward: [(0, '4.500')]
351
+ [2025-07-13 11:36:25,802][06939] Fps is (10 sec: 2048.5, 60 sec: 1966.1, 300 sec: 1966.1). Total num frames: 98304. Throughput: 0: 505.8. Samples: 24129. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
352
+ [2025-07-13 11:36:25,805][06939] Avg episode reward: [(0, '4.592')]
353
+ [2025-07-13 11:36:30,806][06939] Fps is (10 sec: 2047.1, 60 sec: 1936.1, 300 sec: 1936.1). Total num frames: 106496. Throughput: 0: 505.5. Samples: 27002. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
354
+ [2025-07-13 11:36:30,810][06939] Avg episode reward: [(0, '4.454')]
355
+ [2025-07-13 11:36:30,813][07359] Saving new best policy, reward=4.454!
356
+ [2025-07-13 11:36:35,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1979.7, 300 sec: 1979.7). Total num frames: 118784. Throughput: 0: 499.9. Samples: 28498. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
357
+ [2025-07-13 11:36:35,803][06939] Avg episode reward: [(0, '4.456')]
358
+ [2025-07-13 11:36:35,814][07359] Saving new best policy, reward=4.456!
359
+ [2025-07-13 11:36:37,443][07367] Updated weights for policy 0, policy_version 30 (0.0014)
360
+ [2025-07-13 11:36:40,802][06939] Fps is (10 sec: 2048.9, 60 sec: 2116.3, 300 sec: 1953.5). Total num frames: 126976. Throughput: 0: 515.4. Samples: 31905. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
361
+ [2025-07-13 11:36:40,809][06939] Avg episode reward: [(0, '4.306')]
362
+ [2025-07-13 11:36:45,802][06939] Fps is (10 sec: 2048.0, 60 sec: 2048.0, 300 sec: 1989.5). Total num frames: 139264. Throughput: 0: 498.6. Samples: 34659. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
363
+ [2025-07-13 11:36:45,805][06939] Avg episode reward: [(0, '4.352')]
364
+ [2025-07-13 11:36:50,803][06939] Fps is (10 sec: 2047.7, 60 sec: 2047.9, 300 sec: 1966.0). Total num frames: 147456. Throughput: 0: 507.2. Samples: 36371. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
365
+ [2025-07-13 11:36:50,808][06939] Avg episode reward: [(0, '4.362')]
366
+ [2025-07-13 11:36:55,802][06939] Fps is (10 sec: 1638.3, 60 sec: 1979.7, 300 sec: 1945.6). Total num frames: 155648. Throughput: 0: 507.1. Samples: 39470. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
367
+ [2025-07-13 11:36:55,810][06939] Avg episode reward: [(0, '4.446')]
368
+ [2025-07-13 11:36:58,610][07367] Updated weights for policy 0, policy_version 40 (0.0018)
369
+ [2025-07-13 11:37:00,802][06939] Fps is (10 sec: 2048.4, 60 sec: 1979.7, 300 sec: 1975.7). Total num frames: 167936. Throughput: 0: 498.2. Samples: 42031. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
370
+ [2025-07-13 11:37:00,804][06939] Avg episode reward: [(0, '4.559')]
371
+ [2025-07-13 11:37:00,808][07359] Saving new best policy, reward=4.559!
372
+ [2025-07-13 11:37:05,805][06939] Fps is (10 sec: 2047.5, 60 sec: 1979.6, 300 sec: 1956.9). Total num frames: 176128. Throughput: 0: 511.3. Samples: 43682. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
373
+ [2025-07-13 11:37:05,809][06939] Avg episode reward: [(0, '4.462')]
374
+ [2025-07-13 11:37:10,802][06939] Fps is (10 sec: 1638.4, 60 sec: 1979.7, 300 sec: 1940.2). Total num frames: 184320. Throughput: 0: 498.0. Samples: 46541. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
375
+ [2025-07-13 11:37:10,806][06939] Avg episode reward: [(0, '4.346')]
376
+ [2025-07-13 11:37:15,802][06939] Fps is (10 sec: 2048.6, 60 sec: 1979.8, 300 sec: 1966.1). Total num frames: 196608. Throughput: 0: 504.4. Samples: 49699. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
377
+ [2025-07-13 11:37:15,805][06939] Avg episode reward: [(0, '4.272')]
378
+ [2025-07-13 11:37:15,814][07359] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000048_196608.pth...
379
+ [2025-07-13 11:37:15,891][07359] Removing ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000048_196608.pth
380
+ [2025-07-13 11:37:18,217][07359] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth...
381
+ [2025-07-13 11:37:18,218][07359] Stopping Batcher_0...
382
+ [2025-07-13 11:37:18,225][07359] Loop batcher_evt_loop terminating...
383
+ [2025-07-13 11:37:18,223][06939] Component Batcher_0 stopped!
384
+ [2025-07-13 11:37:18,232][07367] Updated weights for policy 0, policy_version 50 (0.0014)
385
+ [2025-07-13 11:37:18,266][07367] Weights refcount: 2 0
386
+ [2025-07-13 11:37:18,268][06939] Component InferenceWorker_p0-w0 stopped!
387
+ [2025-07-13 11:37:18,271][07367] Stopping InferenceWorker_p0-w0...
388
+ [2025-07-13 11:37:18,273][07367] Loop inference_proc0-0_evt_loop terminating...
389
+ [2025-07-13 11:37:18,298][07359] Removing ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth
390
+ [2025-07-13 11:37:18,307][07359] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth...
391
+ [2025-07-13 11:37:18,368][06939] Component RolloutWorker_w1 stopped!
392
+ [2025-07-13 11:37:18,371][07368] Stopping RolloutWorker_w1...
393
+ [2025-07-13 11:37:18,374][07368] Loop rollout_proc1_evt_loop terminating...
394
+ [2025-07-13 11:37:18,387][07359] Removing ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth
395
+ [2025-07-13 11:37:18,396][07359] Stopping LearnerWorker_p0...
396
+ [2025-07-13 11:37:18,396][06939] Component LearnerWorker_p0 stopped!
397
+ [2025-07-13 11:37:18,396][07359] Loop learner_proc0_evt_loop terminating...
398
+ [2025-07-13 11:37:18,401][06939] Component RolloutWorker_w0 stopped!
399
+ [2025-07-13 11:37:18,401][07369] Stopping RolloutWorker_w0...
400
+ [2025-07-13 11:37:18,403][06939] Waiting for process learner_proc0 to stop...
401
+ [2025-07-13 11:37:18,404][07369] Loop rollout_proc0_evt_loop terminating...
402
+ [2025-07-13 11:37:19,546][06939] Waiting for process inference_proc0-0 to join...
403
+ [2025-07-13 11:37:19,549][06939] Waiting for process rollout_proc0 to join...
404
+ [2025-07-13 11:37:19,919][06939] Waiting for process rollout_proc1 to join...
405
+ [2025-07-13 11:37:19,921][06939] Batcher 0 profile tree view:
406
+ batching: 1.1778, releasing_batches: 0.0012
407
+ [2025-07-13 11:37:19,923][06939] InferenceWorker_p0-w0 profile tree view:
408
+ wait_policy: 0.0000
409
+ wait_policy_total: 15.3290
410
+ update_model: 1.2617
411
+ weight_update: 0.0014
412
+ one_step: 0.0035
413
+ handle_policy_step: 83.1065
414
+ deserialize: 1.7763, stack: 0.5898, obs_to_device_normalize: 20.5143, forward: 43.8813, send_messages: 2.3863
415
+ prepare_outputs: 10.0497
416
+ to_cpu: 6.0774
417
+ [2025-07-13 11:37:19,924][06939] Learner 0 profile tree view:
418
+ misc: 0.0002, prepare_batch: 1.8202
419
+ train: 5.3572
420
+ epoch_init: 0.0002, minibatch_init: 0.0003, losses_postprocess: 0.0286, kl_divergence: 0.0488, after_optimizer: 1.7859
421
+ calculate_losses: 1.6856
422
+ losses_init: 0.0002, forward_head: 0.4117, bptt_initial: 0.9225, tail: 0.0585, advantages_returns: 0.0108, losses: 0.1807
423
+ bptt: 0.0892
424
+ bptt_forward_core: 0.0844
425
+ update: 1.7855
426
+ clip: 0.0915
427
+ [2025-07-13 11:37:19,925][06939] RolloutWorker_w0 profile tree view:
428
+ wait_for_trajectories: 0.0519, enqueue_policy_requests: 12.9735, env_step: 38.3118, overhead: 1.9176, complete_rollouts: 0.5191
429
+ save_policy_outputs: 2.2653
430
+ split_output_tensors: 0.9149
431
+ [2025-07-13 11:37:19,926][06939] RolloutWorker_w1 profile tree view:
432
+ wait_for_trajectories: 0.0577, enqueue_policy_requests: 12.6762, env_step: 38.4079, overhead: 1.8928, complete_rollouts: 0.3994
433
+ save_policy_outputs: 2.4013
434
+ split_output_tensors: 0.9375
435
+ [2025-07-13 11:37:19,927][06939] Loop Runner_EvtLoop terminating...
436
+ [2025-07-13 11:37:19,930][06939] Runner profile tree view:
437
+ main_loop: 115.4634
438
+ [2025-07-13 11:37:19,931][06939] Collected {0: 204800}, FPS: 1773.7
439
+ [2025-07-13 11:37:19,942][06939] Environment doom_basic already registered, overwriting...
440
+ [2025-07-13 11:37:19,943][06939] Environment doom_two_colors_easy already registered, overwriting...
441
+ [2025-07-13 11:37:19,944][06939] Environment doom_two_colors_hard already registered, overwriting...
442
+ [2025-07-13 11:37:19,945][06939] Environment doom_dm already registered, overwriting...
443
+ [2025-07-13 11:37:19,946][06939] Environment doom_dwango5 already registered, overwriting...
444
+ [2025-07-13 11:37:19,947][06939] Environment doom_my_way_home_flat_actions already registered, overwriting...
445
+ [2025-07-13 11:37:19,947][06939] Environment doom_defend_the_center_flat_actions already registered, overwriting...
446
+ [2025-07-13 11:37:19,948][06939] Environment doom_my_way_home already registered, overwriting...
447
+ [2025-07-13 11:37:19,952][06939] Environment doom_deadly_corridor already registered, overwriting...
448
+ [2025-07-13 11:37:19,953][06939] Environment doom_defend_the_center already registered, overwriting...
449
+ [2025-07-13 11:37:19,954][06939] Environment doom_defend_the_line already registered, overwriting...
450
+ [2025-07-13 11:37:19,955][06939] Environment doom_health_gathering already registered, overwriting...
451
+ [2025-07-13 11:37:19,956][06939] Environment doom_health_gathering_supreme already registered, overwriting...
452
+ [2025-07-13 11:37:19,956][06939] Environment doom_battle already registered, overwriting...
453
+ [2025-07-13 11:37:19,957][06939] Environment doom_battle2 already registered, overwriting...
454
+ [2025-07-13 11:37:19,958][06939] Environment doom_duel_bots already registered, overwriting...
455
+ [2025-07-13 11:37:19,959][06939] Environment doom_deathmatch_bots already registered, overwriting...
456
+ [2025-07-13 11:37:19,959][06939] Environment doom_duel already registered, overwriting...
457
+ [2025-07-13 11:37:19,960][06939] Environment doom_deathmatch_full already registered, overwriting...
458
+ [2025-07-13 11:37:19,961][06939] Environment doom_benchmark already registered, overwriting...
459
+ [2025-07-13 11:37:19,962][06939] register_encoder_factory: <function make_vizdoom_encoder at 0x7f16fee9a340>
460
+ [2025-07-13 11:39:55,913][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
461
+ [2025-07-13 11:39:55,914][06939] Adding new argument 'no_render'=False that is not in the saved config file!
462
+ [2025-07-13 11:39:55,915][06939] Adding new argument 'save_video'=True that is not in the saved config file!
463
+ [2025-07-13 11:39:55,916][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
464
+ [2025-07-13 11:39:55,917][06939] Adding new argument 'video_name'=None that is not in the saved config file!
465
+ [2025-07-13 11:39:55,917][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
466
+ [2025-07-13 11:39:55,918][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
467
+ [2025-07-13 11:39:55,919][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
468
+ [2025-07-13 11:39:55,920][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
469
+ [2025-07-13 11:39:55,921][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
470
+ [2025-07-13 11:39:55,922][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
471
+ [2025-07-13 11:39:55,922][06939] Adding new argument 'train_script'=None that is not in the saved config file!
472
+ [2025-07-13 11:39:55,923][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
473
+ [2025-07-13 11:39:55,924][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
474
+ [2025-07-13 11:39:55,953][06939] Doom resolution: 160x120, resize resolution: (128, 72)
475
+ [2025-07-13 11:39:55,957][06939] RunningMeanStd input shape: (3, 72, 128)
476
+ [2025-07-13 11:39:55,959][06939] RunningMeanStd input shape: (1,)
477
+ [2025-07-13 11:39:55,974][06939] ConvEncoder: input_channels=3
478
+ [2025-07-13 11:39:56,088][06939] Conv encoder output size: 512
479
+ [2025-07-13 11:39:56,090][06939] Policy head output size: 512
480
+ [2025-07-13 11:39:56,298][06939] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
481
+ [2025-07-13 11:39:56,305][06939] Could not load from checkpoint, attempt 0
482
+ Traceback (most recent call last):
483
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
484
+ checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
485
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
486
+ File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
487
+ raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
488
+ _pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, do those steps only if you trust the source of the checkpoint.
489
+ (1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
490
+ (2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
491
+ WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
492
+
493
+ Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
494
+ [2025-07-13 11:39:56,308][06939] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
495
+ [2025-07-13 11:39:56,310][06939] Could not load from checkpoint, attempt 1
496
+ Traceback (most recent call last):
497
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
498
+ checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
499
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
500
+ File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
501
+ raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
502
+ _pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, do those steps only if you trust the source of the checkpoint.
503
+ (1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
504
+ (2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
505
+ WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
506
+
507
+ Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
508
+ [2025-07-13 11:39:56,311][06939] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
509
+ [2025-07-13 11:39:56,313][06939] Could not load from checkpoint, attempt 2
510
+ Traceback (most recent call last):
511
+ File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
512
+ checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
513
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
514
+ File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
515
+ raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
516
+ _pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, do those steps only if you trust the source of the checkpoint.
517
+ (1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
518
+ (2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
519
+ WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
520
+
521
+ Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
522
+ [2025-07-13 11:41:57,536][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
523
+ [2025-07-13 11:41:57,537][06939] Adding new argument 'no_render'=False that is not in the saved config file!
524
+ [2025-07-13 11:41:57,539][06939] Adding new argument 'save_video'=True that is not in the saved config file!
525
+ [2025-07-13 11:41:57,543][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
526
+ [2025-07-13 11:41:57,547][06939] Adding new argument 'video_name'=None that is not in the saved config file!
527
+ [2025-07-13 11:41:57,548][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
528
+ [2025-07-13 11:41:57,549][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
529
+ [2025-07-13 11:41:57,550][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
530
+ [2025-07-13 11:41:57,551][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
531
+ [2025-07-13 11:41:57,552][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
532
+ [2025-07-13 11:41:57,553][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
533
+ [2025-07-13 11:41:57,554][06939] Adding new argument 'train_script'=None that is not in the saved config file!
534
+ [2025-07-13 11:41:57,555][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
535
+ [2025-07-13 11:41:57,556][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
536
+ [2025-07-13 11:41:57,602][06939] RunningMeanStd input shape: (3, 72, 128)
537
+ [2025-07-13 11:41:57,603][06939] RunningMeanStd input shape: (1,)
538
+ [2025-07-13 11:41:57,618][06939] ConvEncoder: input_channels=3
539
+ [2025-07-13 11:41:57,675][06939] Conv encoder output size: 512
540
+ [2025-07-13 11:41:57,676][06939] Policy head output size: 512
541
+ [2025-07-13 11:42:44,481][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
542
+ [2025-07-13 11:42:44,483][06939] Adding new argument 'no_render'=False that is not in the saved config file!
543
+ [2025-07-13 11:42:44,484][06939] Adding new argument 'save_video'=True that is not in the saved config file!
544
+ [2025-07-13 11:42:44,486][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
545
+ [2025-07-13 11:42:44,489][06939] Adding new argument 'video_name'=None that is not in the saved config file!
546
+ [2025-07-13 11:42:44,489][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
547
+ [2025-07-13 11:42:44,491][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
548
+ [2025-07-13 11:42:44,492][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
549
+ [2025-07-13 11:42:44,492][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
550
+ [2025-07-13 11:42:44,493][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
551
+ [2025-07-13 11:42:44,494][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
552
+ [2025-07-13 11:42:44,494][06939] Adding new argument 'train_script'=None that is not in the saved config file!
553
+ [2025-07-13 11:42:44,499][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
554
+ [2025-07-13 11:42:44,501][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
555
+ [2025-07-13 11:42:44,558][06939] RunningMeanStd input shape: (3, 72, 128)
556
+ [2025-07-13 11:42:44,560][06939] RunningMeanStd input shape: (1,)
557
+ [2025-07-13 11:42:44,577][06939] ConvEncoder: input_channels=3
558
+ [2025-07-13 11:42:44,631][06939] Conv encoder output size: 512
559
+ [2025-07-13 11:42:44,633][06939] Policy head output size: 512
560
+ [2025-07-13 11:43:21,556][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
561
+ [2025-07-13 11:43:21,558][06939] Adding new argument 'no_render'=True that is not in the saved config file!
562
+ [2025-07-13 11:43:21,559][06939] Adding new argument 'save_video'=True that is not in the saved config file!
563
+ [2025-07-13 11:43:21,560][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
564
+ [2025-07-13 11:43:21,562][06939] Adding new argument 'video_name'=None that is not in the saved config file!
565
+ [2025-07-13 11:43:21,563][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
566
+ [2025-07-13 11:43:21,564][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
567
+ [2025-07-13 11:43:21,565][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
568
+ [2025-07-13 11:43:21,567][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
569
+ [2025-07-13 11:43:21,568][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
570
+ [2025-07-13 11:43:21,569][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
571
+ [2025-07-13 11:43:21,570][06939] Adding new argument 'train_script'=None that is not in the saved config file!
572
+ [2025-07-13 11:43:21,571][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
573
+ [2025-07-13 11:43:21,572][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
574
+ [2025-07-13 11:43:21,603][06939] RunningMeanStd input shape: (3, 72, 128)
575
+ [2025-07-13 11:43:21,604][06939] RunningMeanStd input shape: (1,)
576
+ [2025-07-13 11:43:21,615][06939] ConvEncoder: input_channels=3
577
+ [2025-07-13 11:43:21,654][06939] Conv encoder output size: 512
578
+ [2025-07-13 11:43:21,657][06939] Policy head output size: 512
579
+ [2025-07-13 11:43:22,337][06939] Num frames 100...
580
+ [2025-07-13 11:43:22,472][06939] Num frames 200...
581
+ [2025-07-13 11:43:22,604][06939] Num frames 300...
582
+ [2025-07-13 11:43:22,737][06939] Num frames 400...
583
+ [2025-07-13 11:43:22,814][06939] Avg episode rewards: #0: 5.160, true rewards: #0: 4.160
584
+ [2025-07-13 11:43:22,815][06939] Avg episode reward: 5.160, avg true_objective: 4.160
585
+ [2025-07-13 11:43:22,936][06939] Num frames 500...
586
+ [2025-07-13 11:43:23,072][06939] Num frames 600...
587
+ [2025-07-13 11:43:23,202][06939] Num frames 700...
588
+ [2025-07-13 11:43:23,349][06939] Avg episode rewards: #0: 4.840, true rewards: #0: 3.840
589
+ [2025-07-13 11:43:23,350][06939] Avg episode reward: 4.840, avg true_objective: 3.840
590
+ [2025-07-13 11:43:23,395][06939] Num frames 800...
591
+ [2025-07-13 11:43:23,523][06939] Num frames 900...
592
+ [2025-07-13 11:43:23,655][06939] Num frames 1000...
593
+ [2025-07-13 11:43:23,790][06939] Num frames 1100...
594
+ [2025-07-13 11:43:23,911][06939] Avg episode rewards: #0: 4.507, true rewards: #0: 3.840
595
+ [2025-07-13 11:43:23,912][06939] Avg episode reward: 4.507, avg true_objective: 3.840
596
+ [2025-07-13 11:43:23,984][06939] Num frames 1200...
597
+ [2025-07-13 11:43:24,109][06939] Num frames 1300...
598
+ [2025-07-13 11:43:24,240][06939] Num frames 1400...
599
+ [2025-07-13 11:43:24,368][06939] Num frames 1500...
600
+ [2025-07-13 11:43:24,473][06939] Avg episode rewards: #0: 4.340, true rewards: #0: 3.840
601
+ [2025-07-13 11:43:24,474][06939] Avg episode reward: 4.340, avg true_objective: 3.840
602
+ [2025-07-13 11:43:24,569][06939] Num frames 1600...
603
+ [2025-07-13 11:43:24,702][06939] Num frames 1700...
604
+ [2025-07-13 11:43:24,892][06939] Avg episode rewards: #0: 3.992, true rewards: #0: 3.592
605
+ [2025-07-13 11:43:24,893][06939] Avg episode reward: 3.992, avg true_objective: 3.592
606
+ [2025-07-13 11:43:36,753][06939] Replay video saved to ./train_dir/vizdoom_exp/replay.mp4!
607
+ [2025-07-13 11:49:27,400][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
608
+ [2025-07-13 11:49:27,401][06939] Overriding arg 'num_workers' with value 1 passed from command line
609
+ [2025-07-13 11:49:27,402][06939] Adding new argument 'no_render'=True that is not in the saved config file!
610
+ [2025-07-13 11:49:27,403][06939] Adding new argument 'save_video'=True that is not in the saved config file!
611
+ [2025-07-13 11:49:27,404][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
612
+ [2025-07-13 11:49:27,405][06939] Adding new argument 'video_name'=None that is not in the saved config file!
613
+ [2025-07-13 11:49:27,406][06939] Adding new argument 'max_num_frames'=1600 that is not in the saved config file!
614
+ [2025-07-13 11:49:27,407][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
615
+ [2025-07-13 11:49:27,408][06939] Adding new argument 'push_to_hub'=True that is not in the saved config file!
616
+ [2025-07-13 11:49:27,409][06939] Adding new argument 'hf_repository'='lokeessshhhh/rl_vizdoom_health_gathering' that is not in the saved config file!
617
+ [2025-07-13 11:49:27,410][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
618
+ [2025-07-13 11:49:27,412][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
619
+ [2025-07-13 11:49:27,413][06939] Adding new argument 'train_script'=None that is not in the saved config file!
620
+ [2025-07-13 11:49:27,414][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
621
+ [2025-07-13 11:49:27,417][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
622
+ [2025-07-13 11:49:27,440][06939] RunningMeanStd input shape: (3, 72, 128)
623
+ [2025-07-13 11:49:27,442][06939] RunningMeanStd input shape: (1,)
624
+ [2025-07-13 11:49:27,452][06939] ConvEncoder: input_channels=3
625
+ [2025-07-13 11:49:27,490][06939] Conv encoder output size: 512
626
+ [2025-07-13 11:49:27,491][06939] Policy head output size: 512
627
+ [2025-07-13 11:49:27,932][06939] Num frames 100...
628
+ [2025-07-13 11:49:28,063][06939] Num frames 200...
629
+ [2025-07-13 11:49:28,194][06939] Num frames 300...
630
+ [2025-07-13 11:49:28,322][06939] Num frames 400...
631
+ [2025-07-13 11:49:28,439][06939] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480
632
+ [2025-07-13 11:49:28,441][06939] Avg episode reward: 5.480, avg true_objective: 4.480
633
+ [2025-07-13 11:49:28,510][06939] Num frames 500...
634
+ [2025-07-13 11:49:28,641][06939] Num frames 600...
635
+ [2025-07-13 11:49:28,780][06939] Num frames 700...
636
+ [2025-07-13 11:49:28,911][06939] Num frames 800...
637
+ [2025-07-13 11:49:29,008][06939] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
638
+ [2025-07-13 11:49:29,009][06939] Avg episode reward: 4.660, avg true_objective: 4.160
639
+ [2025-07-13 11:49:29,107][06939] Num frames 900...
640
+ [2025-07-13 11:49:29,241][06939] Num frames 1000...
641
+ [2025-07-13 11:49:29,372][06939] Num frames 1100...
642
+ [2025-07-13 11:49:29,503][06939] Num frames 1200...
643
+ [2025-07-13 11:49:29,580][06939] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
644
+ [2025-07-13 11:49:29,581][06939] Avg episode reward: 4.387, avg true_objective: 4.053
645
+ [2025-07-13 11:49:29,694][06939] Num frames 1300...
646
+ [2025-07-13 11:49:29,843][06939] Num frames 1400...
647
+ [2025-07-13 11:49:29,979][06939] Num frames 1500...
648
+ [2025-07-13 11:49:30,108][06939] Num frames 1600...
649
+ [2025-07-13 11:49:39,419][06939] Replay video saved to ./train_dir/vizdoom_exp/replay.mp4!
650
+ [2025-07-13 11:49:50,742][06939] The model has been pushed to https://huggingface.co/lokeessshhhh/rl_vizdoom_health_gathering
651
+ [2025-07-13 11:56:39,097][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
652
+ [2025-07-13 11:56:39,099][06939] Overriding arg 'num_workers' with value 1 passed from command line
653
+ [2025-07-13 11:56:39,101][06939] Adding new argument 'no_render'=True that is not in the saved config file!
654
+ [2025-07-13 11:56:39,102][06939] Adding new argument 'save_video'=True that is not in the saved config file!
655
+ [2025-07-13 11:56:39,103][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
656
+ [2025-07-13 11:56:39,105][06939] Adding new argument 'video_name'=None that is not in the saved config file!
657
+ [2025-07-13 11:56:39,105][06939] Adding new argument 'max_num_frames'=1600 that is not in the saved config file!
658
+ [2025-07-13 11:56:39,108][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
659
+ [2025-07-13 11:56:39,109][06939] Adding new argument 'push_to_hub'=True that is not in the saved config file!
660
+ [2025-07-13 11:56:39,109][06939] Adding new argument 'hf_repository'='lokeessshhhh/rl_vizdoom_health_gathering' that is not in the saved config file!
661
+ [2025-07-13 11:56:39,110][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
662
+ [2025-07-13 11:56:39,114][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
663
+ [2025-07-13 11:56:39,115][06939] Adding new argument 'train_script'=None that is not in the saved config file!
664
+ [2025-07-13 11:56:39,115][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
665
+ [2025-07-13 11:56:39,118][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
666
+ [2025-07-13 11:56:39,161][06939] RunningMeanStd input shape: (3, 72, 128)
667
+ [2025-07-13 11:56:39,164][06939] RunningMeanStd input shape: (1,)
668
+ [2025-07-13 11:56:39,181][06939] ConvEncoder: input_channels=3
669
+ [2025-07-13 11:56:39,235][06939] Conv encoder output size: 512
670
+ [2025-07-13 11:56:39,236][06939] Policy head output size: 512
671
+ [2025-07-13 11:56:39,919][06939] Num frames 100...
672
+ [2025-07-13 11:56:40,107][06939] Num frames 200...
673
+ [2025-07-13 11:56:40,244][06939] Num frames 300...
674
+ [2025-07-13 11:56:40,408][06939] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
675
+ [2025-07-13 11:56:40,409][06939] Avg episode reward: 3.840, avg true_objective: 3.840
676
+ [2025-07-13 11:56:40,432][06939] Num frames 400...
677
+ [2025-07-13 11:56:40,557][06939] Num frames 500...
678
+ [2025-07-13 11:56:40,695][06939] Num frames 600...
679
+ [2025-07-13 11:56:40,823][06939] Num frames 700...
680
+ [2025-07-13 11:56:40,969][06939] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
681
+ [2025-07-13 11:56:40,970][06939] Avg episode reward: 3.840, avg true_objective: 3.840
682
+ [2025-07-13 11:56:41,012][06939] Num frames 800...
683
+ [2025-07-13 11:56:41,135][06939] Num frames 900...
684
+ [2025-07-13 11:56:41,266][06939] Num frames 1000...
685
+ [2025-07-13 11:56:41,352][06939] Avg episode rewards: #0: 3.413, true rewards: #0: 3.413
686
+ [2025-07-13 11:56:41,353][06939] Avg episode reward: 3.413, avg true_objective: 3.413
687
+ [2025-07-13 11:56:41,448][06939] Num frames 1100...
688
+ [2025-07-13 11:56:41,573][06939] Num frames 1200...
689
+ [2025-07-13 11:56:41,707][06939] Num frames 1300...
690
+ [2025-07-13 11:56:41,833][06939] Num frames 1400...
691
+ [2025-07-13 11:56:41,897][06939] Avg episode rewards: #0: 3.520, true rewards: #0: 3.520
692
+ [2025-07-13 11:56:41,899][06939] Avg episode reward: 3.520, avg true_objective: 3.520
693
+ [2025-07-13 11:56:42,019][06939] Num frames 1500...
694
+ [2025-07-13 11:56:42,146][06939] Num frames 1600...
695
+ [2025-07-13 11:56:50,594][06939] Replay video saved to ./train_dir/vizdoom_exp/replay.mp4!