Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- .summary/0/events.out.tfevents.1752406344.7ee6365361bc +3 -0
- .summary/0/events.out.tfevents.1752406515.7ee6365361bc +3 -0
- README.md +56 -0
- checkpoint_p0/best_000000041_167936_reward_4.766.pth +3 -0
- checkpoint_p0/checkpoint_000000051_208896.pth +3 -0
- checkpoint_p0/checkpoint_000000053_217088.pth +3 -0
- config.json +143 -0
- replay.mp4 +3 -0
- sf_log.txt +695 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
replay.mp4 filter=lfs diff=lfs merge=lfs -text
|
.summary/0/events.out.tfevents.1752406344.7ee6365361bc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc98f0422f716f0bebd12e0b5ff32b15f4cdc828223e68941422af2f924a08d1
|
3 |
+
size 71424
|
.summary/0/events.out.tfevents.1752406515.7ee6365361bc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb5cf132b02e682e1d7a043080a6d13bcd473a9ba34ccca634d40f8168e1571d
|
3 |
+
size 68936
|
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: sample-factory
|
3 |
+
tags:
|
4 |
+
- deep-reinforcement-learning
|
5 |
+
- reinforcement-learning
|
6 |
+
- sample-factory
|
7 |
+
model-index:
|
8 |
+
- name: APPO
|
9 |
+
results:
|
10 |
+
- task:
|
11 |
+
type: reinforcement-learning
|
12 |
+
name: reinforcement-learning
|
13 |
+
dataset:
|
14 |
+
name: doom_health_gathering_supreme
|
15 |
+
type: doom_health_gathering_supreme
|
16 |
+
metrics:
|
17 |
+
- type: mean_reward
|
18 |
+
value: 3.52 +/- 0.55
|
19 |
+
name: mean_reward
|
20 |
+
verified: false
|
21 |
+
---
|
22 |
+
|
23 |
+
A(n) **APPO** model trained on the **doom_health_gathering_supreme** environment.
|
24 |
+
|
25 |
+
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
26 |
+
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
27 |
+
|
28 |
+
|
29 |
+
## Downloading the model
|
30 |
+
|
31 |
+
After installing Sample-Factory, download the model with:
|
32 |
+
```
|
33 |
+
python -m sample_factory.huggingface.load_from_hub -r lokeessshhhh/rl_vizdoom_health_gathering
|
34 |
+
```
|
35 |
+
|
36 |
+
|
37 |
+
## Using the model
|
38 |
+
|
39 |
+
To run the model after download, use the `enjoy` script corresponding to this environment:
|
40 |
+
```
|
41 |
+
python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_vizdoom_health_gathering
|
42 |
+
```
|
43 |
+
|
44 |
+
|
45 |
+
You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
|
46 |
+
See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
|
47 |
+
|
48 |
+
## Training with this model
|
49 |
+
|
50 |
+
To continue training with this model, use the `train` script corresponding to this environment:
|
51 |
+
```
|
52 |
+
python -m <path.to.train.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_vizdoom_health_gathering --restart_behavior=resume --train_for_env_steps=10000000000
|
53 |
+
```
|
54 |
+
|
55 |
+
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
56 |
+
|
checkpoint_p0/best_000000041_167936_reward_4.766.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee735185d5c0abf6417575a494c0d262bb25bd74e88ccf4502ed03c8f98c612f
|
3 |
+
size 34929051
|
checkpoint_p0/checkpoint_000000051_208896.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:945044c2189c3dcb74e02f1fe84203a3a416e8ffd3c9fc3fedb9ce2ed37d7087
|
3 |
+
size 34929477
|
checkpoint_p0/checkpoint_000000053_217088.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf9cde09fbf8c2bfb97d48a33ad37407e78cd2b884828bd0954bf6f46f6fa5e5
|
3 |
+
size 34929477
|
config.json
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"help": false,
|
3 |
+
"algo": "APPO",
|
4 |
+
"env": "doom_health_gathering_supreme",
|
5 |
+
"experiment": "vizdoom_exp",
|
6 |
+
"train_dir": "./train_dir",
|
7 |
+
"restart_behavior": "resume",
|
8 |
+
"device": "gpu",
|
9 |
+
"seed": null,
|
10 |
+
"num_policies": 1,
|
11 |
+
"async_rl": true,
|
12 |
+
"serial_mode": false,
|
13 |
+
"batched_sampling": false,
|
14 |
+
"num_batches_to_accumulate": 2,
|
15 |
+
"worker_num_splits": 2,
|
16 |
+
"policy_workers_per_policy": 1,
|
17 |
+
"max_policy_lag": 1000,
|
18 |
+
"num_workers": 2,
|
19 |
+
"num_envs_per_worker": 2,
|
20 |
+
"batch_size": 1024,
|
21 |
+
"num_batches_per_epoch": 1,
|
22 |
+
"num_epochs": 1,
|
23 |
+
"rollout": 32,
|
24 |
+
"recurrence": 32,
|
25 |
+
"shuffle_minibatches": false,
|
26 |
+
"gamma": 0.99,
|
27 |
+
"reward_scale": 1.0,
|
28 |
+
"reward_clip": 1000.0,
|
29 |
+
"value_bootstrap": false,
|
30 |
+
"normalize_returns": true,
|
31 |
+
"exploration_loss_coeff": 0.001,
|
32 |
+
"value_loss_coeff": 0.5,
|
33 |
+
"kl_loss_coeff": 0.0,
|
34 |
+
"exploration_loss": "symmetric_kl",
|
35 |
+
"gae_lambda": 0.95,
|
36 |
+
"ppo_clip_ratio": 0.1,
|
37 |
+
"ppo_clip_value": 0.2,
|
38 |
+
"with_vtrace": false,
|
39 |
+
"vtrace_rho": 1.0,
|
40 |
+
"vtrace_c": 1.0,
|
41 |
+
"optimizer": "adam",
|
42 |
+
"adam_eps": 1e-06,
|
43 |
+
"adam_beta1": 0.9,
|
44 |
+
"adam_beta2": 0.999,
|
45 |
+
"max_grad_norm": 4.0,
|
46 |
+
"learning_rate": 0.0001,
|
47 |
+
"lr_schedule": "constant",
|
48 |
+
"lr_schedule_kl_threshold": 0.008,
|
49 |
+
"lr_adaptive_min": 1e-06,
|
50 |
+
"lr_adaptive_max": 0.01,
|
51 |
+
"obs_subtract_mean": 0.0,
|
52 |
+
"obs_scale": 255.0,
|
53 |
+
"normalize_input": true,
|
54 |
+
"normalize_input_keys": null,
|
55 |
+
"decorrelate_experience_max_seconds": 0,
|
56 |
+
"decorrelate_envs_on_one_worker": true,
|
57 |
+
"actor_worker_gpus": [],
|
58 |
+
"set_workers_cpu_affinity": true,
|
59 |
+
"force_envs_single_thread": false,
|
60 |
+
"default_niceness": 0,
|
61 |
+
"log_to_file": true,
|
62 |
+
"experiment_summaries_interval": 10,
|
63 |
+
"flush_summaries_interval": 30,
|
64 |
+
"stats_avg": 100,
|
65 |
+
"summaries_use_frameskip": true,
|
66 |
+
"heartbeat_interval": 20,
|
67 |
+
"heartbeat_reporting_interval": 600,
|
68 |
+
"train_for_env_steps": 200000,
|
69 |
+
"train_for_seconds": 10000000000,
|
70 |
+
"save_every_sec": 120,
|
71 |
+
"keep_checkpoints": 2,
|
72 |
+
"load_checkpoint_kind": "latest",
|
73 |
+
"save_milestones_sec": -1,
|
74 |
+
"save_best_every_sec": 5,
|
75 |
+
"save_best_metric": "reward",
|
76 |
+
"save_best_after": 100000,
|
77 |
+
"benchmark": false,
|
78 |
+
"encoder_mlp_layers": [
|
79 |
+
512,
|
80 |
+
512
|
81 |
+
],
|
82 |
+
"encoder_conv_architecture": "convnet_simple",
|
83 |
+
"encoder_conv_mlp_layers": [
|
84 |
+
512
|
85 |
+
],
|
86 |
+
"use_rnn": true,
|
87 |
+
"rnn_size": 512,
|
88 |
+
"rnn_type": "gru",
|
89 |
+
"rnn_num_layers": 1,
|
90 |
+
"decoder_mlp_layers": [],
|
91 |
+
"nonlinearity": "elu",
|
92 |
+
"policy_initialization": "orthogonal",
|
93 |
+
"policy_init_gain": 1.0,
|
94 |
+
"actor_critic_share_weights": true,
|
95 |
+
"adaptive_stddev": true,
|
96 |
+
"continuous_tanh_scale": 0.0,
|
97 |
+
"initial_stddev": 1.0,
|
98 |
+
"use_env_info_cache": false,
|
99 |
+
"env_gpu_actions": false,
|
100 |
+
"env_gpu_observations": true,
|
101 |
+
"env_frameskip": 4,
|
102 |
+
"env_framestack": 1,
|
103 |
+
"pixel_format": "CHW",
|
104 |
+
"use_record_episode_statistics": false,
|
105 |
+
"with_wandb": false,
|
106 |
+
"wandb_user": null,
|
107 |
+
"wandb_project": "sample_factory",
|
108 |
+
"wandb_group": null,
|
109 |
+
"wandb_job_type": "SF",
|
110 |
+
"wandb_tags": [],
|
111 |
+
"with_pbt": false,
|
112 |
+
"pbt_mix_policies_in_one_env": true,
|
113 |
+
"pbt_period_env_steps": 5000000,
|
114 |
+
"pbt_start_mutation": 20000000,
|
115 |
+
"pbt_replace_fraction": 0.3,
|
116 |
+
"pbt_mutation_rate": 0.15,
|
117 |
+
"pbt_replace_reward_gap": 0.1,
|
118 |
+
"pbt_replace_reward_gap_absolute": 1e-06,
|
119 |
+
"pbt_optimize_gamma": false,
|
120 |
+
"pbt_target_objective": "true_objective",
|
121 |
+
"pbt_perturb_min": 1.1,
|
122 |
+
"pbt_perturb_max": 1.5,
|
123 |
+
"num_agents": -1,
|
124 |
+
"num_humans": 0,
|
125 |
+
"num_bots": -1,
|
126 |
+
"start_bot_difficulty": null,
|
127 |
+
"timelimit": null,
|
128 |
+
"res_w": 128,
|
129 |
+
"res_h": 72,
|
130 |
+
"wide_aspect_ratio": false,
|
131 |
+
"eval_env_frameskip": 1,
|
132 |
+
"fps": 35,
|
133 |
+
"command_line": "--env=doom_health_gathering_supreme --experiment=vizdoom_exp --train_dir=./train_dir --num_workers=2 --num_envs_per_worker=2",
|
134 |
+
"cli_args": {
|
135 |
+
"env": "doom_health_gathering_supreme",
|
136 |
+
"experiment": "vizdoom_exp",
|
137 |
+
"train_dir": "./train_dir",
|
138 |
+
"num_workers": 2,
|
139 |
+
"num_envs_per_worker": 2
|
140 |
+
},
|
141 |
+
"git_hash": "unknown",
|
142 |
+
"git_repo_name": "not a git repository"
|
143 |
+
}
|
replay.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c012dd4e71bbaffeb8d176b43f81991a69b62e99cb3e61c650b87c5c07cda2ba
|
3 |
+
size 2306568
|
sf_log.txt
ADDED
@@ -0,0 +1,695 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2025-07-13 11:32:27,039][02343] Saving configuration to ./train_dir/vizdoom_exp/config.json...
|
2 |
+
[2025-07-13 11:32:27,041][02343] Rollout worker 0 uses device cpu
|
3 |
+
[2025-07-13 11:32:27,042][02343] Rollout worker 1 uses device cpu
|
4 |
+
[2025-07-13 11:32:27,189][02343] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
5 |
+
[2025-07-13 11:32:27,190][02343] InferenceWorker_p0-w0: min num requests: 1
|
6 |
+
[2025-07-13 11:32:27,200][02343] Starting all processes...
|
7 |
+
[2025-07-13 11:32:27,201][02343] Starting process learner_proc0
|
8 |
+
[2025-07-13 11:32:27,262][02343] Starting all processes...
|
9 |
+
[2025-07-13 11:32:27,268][02343] Starting process inference_proc0-0
|
10 |
+
[2025-07-13 11:32:27,269][02343] Starting process rollout_proc0
|
11 |
+
[2025-07-13 11:32:27,269][02343] Starting process rollout_proc1
|
12 |
+
[2025-07-13 11:32:33,042][06118] Worker 1 uses CPU cores [1]
|
13 |
+
[2025-07-13 11:32:33,235][06119] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
14 |
+
[2025-07-13 11:32:33,235][06119] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
15 |
+
[2025-07-13 11:32:33,255][06119] Num visible devices: 1
|
16 |
+
[2025-07-13 11:32:33,310][06117] Worker 0 uses CPU cores [0]
|
17 |
+
[2025-07-13 11:32:33,348][06110] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
18 |
+
[2025-07-13 11:32:33,348][06110] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
19 |
+
[2025-07-13 11:32:33,365][06110] Num visible devices: 1
|
20 |
+
[2025-07-13 11:32:33,365][06110] Starting seed is not provided
|
21 |
+
[2025-07-13 11:32:33,366][06110] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
22 |
+
[2025-07-13 11:32:33,366][06110] Initializing actor-critic model on device cuda:0
|
23 |
+
[2025-07-13 11:32:33,366][06110] RunningMeanStd input shape: (3, 72, 128)
|
24 |
+
[2025-07-13 11:32:33,369][06110] RunningMeanStd input shape: (1,)
|
25 |
+
[2025-07-13 11:32:33,380][06110] ConvEncoder: input_channels=3
|
26 |
+
[2025-07-13 11:32:33,653][06110] Conv encoder output size: 512
|
27 |
+
[2025-07-13 11:32:33,653][06110] Policy head output size: 512
|
28 |
+
[2025-07-13 11:32:33,708][06110] Created Actor Critic model with architecture:
|
29 |
+
[2025-07-13 11:32:33,709][06110] ActorCriticSharedWeights(
|
30 |
+
(obs_normalizer): ObservationNormalizer(
|
31 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
32 |
+
(running_mean_std): ModuleDict(
|
33 |
+
(obs): RunningMeanStdInPlace()
|
34 |
+
)
|
35 |
+
)
|
36 |
+
)
|
37 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
38 |
+
(encoder): VizdoomEncoder(
|
39 |
+
(basic_encoder): ConvEncoder(
|
40 |
+
(enc): RecursiveScriptModule(
|
41 |
+
original_name=ConvEncoderImpl
|
42 |
+
(conv_head): RecursiveScriptModule(
|
43 |
+
original_name=Sequential
|
44 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
45 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
46 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
47 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
48 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
49 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
50 |
+
)
|
51 |
+
(mlp_layers): RecursiveScriptModule(
|
52 |
+
original_name=Sequential
|
53 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
54 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
55 |
+
)
|
56 |
+
)
|
57 |
+
)
|
58 |
+
)
|
59 |
+
(core): ModelCoreRNN(
|
60 |
+
(core): GRU(512, 512)
|
61 |
+
)
|
62 |
+
(decoder): MlpDecoder(
|
63 |
+
(mlp): Identity()
|
64 |
+
)
|
65 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
66 |
+
(action_parameterization): ActionParameterizationDefault(
|
67 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
68 |
+
)
|
69 |
+
)
|
70 |
+
[2025-07-13 11:32:33,948][06110] Using optimizer <class 'torch.optim.adam.Adam'>
|
71 |
+
[2025-07-13 11:32:38,823][06110] No checkpoints found
|
72 |
+
[2025-07-13 11:32:38,823][06110] Did not load from checkpoint, starting from scratch!
|
73 |
+
[2025-07-13 11:32:38,823][06110] Initialized policy 0 weights for model version 0
|
74 |
+
[2025-07-13 11:32:38,826][06110] LearnerWorker_p0 finished initialization!
|
75 |
+
[2025-07-13 11:32:38,827][06110] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
76 |
+
[2025-07-13 11:32:38,965][06119] RunningMeanStd input shape: (3, 72, 128)
|
77 |
+
[2025-07-13 11:32:38,966][06119] RunningMeanStd input shape: (1,)
|
78 |
+
[2025-07-13 11:32:38,977][06119] ConvEncoder: input_channels=3
|
79 |
+
[2025-07-13 11:32:39,078][06119] Conv encoder output size: 512
|
80 |
+
[2025-07-13 11:32:39,079][06119] Policy head output size: 512
|
81 |
+
[2025-07-13 11:32:39,115][02343] Inference worker 0-0 is ready!
|
82 |
+
[2025-07-13 11:32:39,117][02343] All inference workers are ready! Signal rollout workers to start!
|
83 |
+
[2025-07-13 11:32:39,167][06118] Doom resolution: 160x120, resize resolution: (128, 72)
|
84 |
+
[2025-07-13 11:32:39,177][06117] Doom resolution: 160x120, resize resolution: (128, 72)
|
85 |
+
[2025-07-13 11:32:39,251][02343] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
86 |
+
[2025-07-13 11:32:39,562][06118] Decorrelating experience for 0 frames...
|
87 |
+
[2025-07-13 11:32:39,581][06117] Decorrelating experience for 0 frames...
|
88 |
+
[2025-07-13 11:32:40,000][06118] Decorrelating experience for 32 frames...
|
89 |
+
[2025-07-13 11:32:40,013][06117] Decorrelating experience for 32 frames...
|
90 |
+
[2025-07-13 11:32:44,253][02343] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 138.2. Samples: 691. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
91 |
+
[2025-07-13 11:32:44,254][02343] Avg episode reward: [(0, '4.091')]
|
92 |
+
[2025-07-13 11:32:47,181][02343] Heartbeat connected on Batcher_0
|
93 |
+
[2025-07-13 11:32:47,185][02343] Heartbeat connected on LearnerWorker_p0
|
94 |
+
[2025-07-13 11:32:47,194][02343] Heartbeat connected on InferenceWorker_p0-w0
|
95 |
+
[2025-07-13 11:32:47,199][02343] Heartbeat connected on RolloutWorker_w0
|
96 |
+
[2025-07-13 11:32:47,202][02343] Heartbeat connected on RolloutWorker_w1
|
97 |
+
[2025-07-13 11:32:49,255][02343] Fps is (10 sec: 1228.3, 60 sec: 1228.3, 300 sec: 1228.3). Total num frames: 12288. Throughput: 0: 305.8. Samples: 3059. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
98 |
+
[2025-07-13 11:32:49,257][02343] Avg episode reward: [(0, '4.554')]
|
99 |
+
[2025-07-13 11:32:54,251][02343] Fps is (10 sec: 2048.3, 60 sec: 1365.3, 300 sec: 1365.3). Total num frames: 20480. Throughput: 0: 395.1. Samples: 5927. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
100 |
+
[2025-07-13 11:32:54,256][02343] Avg episode reward: [(0, '4.525')]
|
101 |
+
[2025-07-13 11:32:59,251][02343] Fps is (10 sec: 2048.8, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 32768. Throughput: 0: 377.1. Samples: 7543. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
102 |
+
[2025-07-13 11:32:59,255][02343] Avg episode reward: [(0, '4.484')]
|
103 |
+
[2025-07-13 11:33:02,707][06119] Updated weights for policy 0, policy_version 10 (0.0020)
|
104 |
+
[2025-07-13 11:33:04,252][02343] Fps is (10 sec: 2048.0, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 40960. Throughput: 0: 418.6. Samples: 10465. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
105 |
+
[2025-07-13 11:33:04,256][02343] Avg episode reward: [(0, '4.518')]
|
106 |
+
[2025-07-13 11:33:09,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1774.9, 300 sec: 1774.9). Total num frames: 53248. Throughput: 0: 458.9. Samples: 13767. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
107 |
+
[2025-07-13 11:33:09,255][02343] Avg episode reward: [(0, '4.456')]
|
108 |
+
[2025-07-13 11:33:14,251][02343] Fps is (10 sec: 2457.6, 60 sec: 1872.5, 300 sec: 1872.5). Total num frames: 65536. Throughput: 0: 442.7. Samples: 15493. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
109 |
+
[2025-07-13 11:33:14,254][02343] Avg episode reward: [(0, '4.435')]
|
110 |
+
[2025-07-13 11:33:19,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1843.2, 300 sec: 1843.2). Total num frames: 73728. Throughput: 0: 456.6. Samples: 18262. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
111 |
+
[2025-07-13 11:33:19,255][02343] Avg episode reward: [(0, '4.358')]
|
112 |
+
[2025-07-13 11:33:22,039][06119] Updated weights for policy 0, policy_version 20 (0.0013)
|
113 |
+
[2025-07-13 11:33:24,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 86016. Throughput: 0: 482.6. Samples: 21718. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
114 |
+
[2025-07-13 11:33:24,254][02343] Avg episode reward: [(0, '4.320')]
|
115 |
+
[2025-07-13 11:33:29,254][02343] Fps is (10 sec: 2047.9, 60 sec: 1884.1, 300 sec: 1884.1). Total num frames: 94208. Throughput: 0: 505.8. Samples: 23452. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
116 |
+
[2025-07-13 11:33:29,255][02343] Avg episode reward: [(0, '4.360')]
|
117 |
+
[2025-07-13 11:33:34,251][02343] Fps is (10 sec: 2048.0, 60 sec: 1936.3, 300 sec: 1936.3). Total num frames: 106496. Throughput: 0: 513.3. Samples: 26154. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
118 |
+
[2025-07-13 11:33:34,256][02343] Avg episode reward: [(0, '4.347')]
|
119 |
+
[2025-07-13 11:33:34,262][06110] Saving new best policy, reward=4.347!
|
120 |
+
[2025-07-13 11:33:39,251][02343] Fps is (10 sec: 2048.1, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 114688. Throughput: 0: 525.0. Samples: 29551. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
121 |
+
[2025-07-13 11:33:39,253][02343] Avg episode reward: [(0, '4.298')]
|
122 |
+
[2025-07-13 11:33:41,944][06119] Updated weights for policy 0, policy_version 30 (0.0014)
|
123 |
+
[2025-07-13 11:33:44,251][02343] Fps is (10 sec: 1638.4, 60 sec: 2048.0, 300 sec: 1890.5). Total num frames: 122880. Throughput: 0: 516.9. Samples: 30803. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
124 |
+
[2025-07-13 11:33:44,255][02343] Avg episode reward: [(0, '4.267')]
|
125 |
+
[2025-07-13 11:33:49,251][02343] Fps is (10 sec: 2048.0, 60 sec: 2048.1, 300 sec: 1931.0). Total num frames: 135168. Throughput: 0: 522.6. Samples: 33984. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
126 |
+
[2025-07-13 11:33:49,255][02343] Avg episode reward: [(0, '4.283')]
|
127 |
+
[2025-07-13 11:33:54,253][02343] Fps is (10 sec: 2457.2, 60 sec: 2116.2, 300 sec: 1966.0). Total num frames: 147456. Throughput: 0: 523.3. Samples: 37317. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
128 |
+
[2025-07-13 11:33:54,254][02343] Avg episode reward: [(0, '4.434')]
|
129 |
+
[2025-07-13 11:33:54,261][06110] Saving new best policy, reward=4.434!
|
130 |
+
[2025-07-13 11:33:59,251][02343] Fps is (10 sec: 2048.0, 60 sec: 2048.0, 300 sec: 1945.6). Total num frames: 155648. Throughput: 0: 509.5. Samples: 38420. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
131 |
+
[2025-07-13 11:33:59,258][02343] Avg episode reward: [(0, '4.585')]
|
132 |
+
[2025-07-13 11:33:59,264][06110] Saving new best policy, reward=4.585!
|
133 |
+
[2025-07-13 11:34:01,670][06119] Updated weights for policy 0, policy_version 40 (0.0013)
|
134 |
+
[2025-07-13 11:34:04,251][02343] Fps is (10 sec: 2048.3, 60 sec: 2116.3, 300 sec: 1975.7). Total num frames: 167936. Throughput: 0: 525.0. Samples: 41885. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
135 |
+
[2025-07-13 11:34:04,255][02343] Avg episode reward: [(0, '4.766')]
|
136 |
+
[2025-07-13 11:34:04,263][06110] Saving new best policy, reward=4.766!
|
137 |
+
[2025-07-13 11:34:09,259][02343] Fps is (10 sec: 2046.4, 60 sec: 2047.7, 300 sec: 1956.8). Total num frames: 176128. Throughput: 0: 510.4. Samples: 44692. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
138 |
+
[2025-07-13 11:34:09,261][02343] Avg episode reward: [(0, '4.719')]
|
139 |
+
[2025-07-13 11:34:14,251][02343] Fps is (10 sec: 2048.0, 60 sec: 2048.0, 300 sec: 1983.3). Total num frames: 188416. Throughput: 0: 507.6. Samples: 46295. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
140 |
+
[2025-07-13 11:34:14,255][02343] Avg episode reward: [(0, '4.561')]
|
141 |
+
[2025-07-13 11:34:19,251][02343] Fps is (10 sec: 2459.5, 60 sec: 2116.3, 300 sec: 2007.0). Total num frames: 200704. Throughput: 0: 525.2. Samples: 49790. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
142 |
+
[2025-07-13 11:34:19,255][02343] Avg episode reward: [(0, '4.330')]
|
143 |
+
[2025-07-13 11:34:21,466][06119] Updated weights for policy 0, policy_version 50 (0.0017)
|
144 |
+
[2025-07-13 11:34:24,257][02343] Fps is (10 sec: 2046.8, 60 sec: 2047.8, 300 sec: 1989.4). Total num frames: 208896. Throughput: 0: 509.2. Samples: 52469. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
145 |
+
[2025-07-13 11:34:24,262][02343] Avg episode reward: [(0, '4.333')]
|
146 |
+
[2025-07-13 11:34:24,270][06110] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000051_208896.pth...
|
147 |
+
[2025-07-13 11:34:28,853][02343] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 2343], exiting...
|
148 |
+
[2025-07-13 11:34:28,857][06110] Stopping Batcher_0...
|
149 |
+
[2025-07-13 11:34:28,857][06110] Loop batcher_evt_loop terminating...
|
150 |
+
[2025-07-13 11:34:28,859][06110] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
|
151 |
+
[2025-07-13 11:34:28,899][06118] EvtLoop [rollout_proc1_evt_loop, process=rollout_proc1] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance1'), args=(1, 0)
|
152 |
+
Traceback (most recent call last):
|
153 |
+
File "/usr/local/lib/python3.11/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal
|
154 |
+
slot_callable(*args)
|
155 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts
|
156 |
+
complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)
|
157 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
158 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts
|
159 |
+
new_obs, rewards, terminated, truncated, infos = e.step(actions)
|
160 |
+
^^^^^^^^^^^^^^^
|
161 |
+
File "/usr/local/lib/python3.11/dist-packages/gymnasium/core.py", line 461, in step
|
162 |
+
return self.env.step(action)
|
163 |
+
^^^^^^^^^^^^^^^^^^^^^
|
164 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step
|
165 |
+
obs, rew, terminated, truncated, info = self.env.step(action)
|
166 |
+
^^^^^^^^^^^^^^^^^^^^^
|
167 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step
|
168 |
+
obs, rew, terminated, truncated, info = self.env.step(action)
|
169 |
+
^^^^^^^^^^^^^^^^^^^^^
|
170 |
+
File "/usr/local/lib/python3.11/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step
|
171 |
+
observation, reward, terminated, truncated, info = self.env.step(action)
|
172 |
+
^^^^^^^^^^^^^^^^^^^^^
|
173 |
+
File "/usr/local/lib/python3.11/dist-packages/gymnasium/core.py", line 522, in step
|
174 |
+
observation, reward, terminated, truncated, info = self.env.step(action)
|
175 |
+
^^^^^^^^^^^^^^^^^^^^^
|
176 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step
|
177 |
+
obs, reward, terminated, truncated, info = self.env.step(action)
|
178 |
+
^^^^^^^^^^^^^^^^^^^^^
|
179 |
+
File "/usr/local/lib/python3.11/dist-packages/gymnasium/core.py", line 461, in step
|
180 |
+
return self.env.step(action)
|
181 |
+
^^^^^^^^^^^^^^^^^^^^^
|
182 |
+
File "/usr/local/lib/python3.11/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step
|
183 |
+
obs, reward, terminated, truncated, info = self.env.step(action)
|
184 |
+
^^^^^^^^^^^^^^^^^^^^^
|
185 |
+
File "/usr/local/lib/python3.11/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step
|
186 |
+
reward = self.game.make_action(actions_flattened, self.skip_frames)
|
187 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
188 |
+
vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.
|
189 |
+
[2025-07-13 11:34:28,912][06118] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc1_evt_loop
|
190 |
+
[2025-07-13 11:34:28,945][06119] Weights refcount: 2 0
|
191 |
+
[2025-07-13 11:34:28,949][06119] Stopping InferenceWorker_p0-w0...
|
192 |
+
[2025-07-13 11:34:28,949][06119] Loop inference_proc0-0_evt_loop terminating...
|
193 |
+
[2025-07-13 11:34:29,010][06110] Stopping LearnerWorker_p0...
|
194 |
+
[2025-07-13 11:34:29,011][06110] Loop learner_proc0_evt_loop terminating...
|
195 |
+
[2025-07-13 11:35:24,325][06939] Saving configuration to ./train_dir/vizdoom_exp/config.json...
|
196 |
+
[2025-07-13 11:35:24,327][06939] Rollout worker 0 uses device cpu
|
197 |
+
[2025-07-13 11:35:24,328][06939] Rollout worker 1 uses device cpu
|
198 |
+
[2025-07-13 11:35:24,454][06939] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
199 |
+
[2025-07-13 11:35:24,455][06939] InferenceWorker_p0-w0: min num requests: 1
|
200 |
+
[2025-07-13 11:35:24,467][06939] Starting all processes...
|
201 |
+
[2025-07-13 11:35:24,468][06939] Starting process learner_proc0
|
202 |
+
[2025-07-13 11:35:24,769][06939] Starting all processes...
|
203 |
+
[2025-07-13 11:35:24,783][06939] Starting process inference_proc0-0
|
204 |
+
[2025-07-13 11:35:24,784][06939] Starting process rollout_proc0
|
205 |
+
[2025-07-13 11:35:24,784][06939] Starting process rollout_proc1
|
206 |
+
[2025-07-13 11:35:30,789][07359] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
207 |
+
[2025-07-13 11:35:30,794][07359] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
208 |
+
[2025-07-13 11:35:30,857][07359] Num visible devices: 1
|
209 |
+
[2025-07-13 11:35:30,861][07359] Starting seed is not provided
|
210 |
+
[2025-07-13 11:35:30,861][07359] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
211 |
+
[2025-07-13 11:35:30,862][07359] Initializing actor-critic model on device cuda:0
|
212 |
+
[2025-07-13 11:35:30,863][07359] RunningMeanStd input shape: (3, 72, 128)
|
213 |
+
[2025-07-13 11:35:30,866][07359] RunningMeanStd input shape: (1,)
|
214 |
+
[2025-07-13 11:35:30,937][07359] ConvEncoder: input_channels=3
|
215 |
+
[2025-07-13 11:35:31,320][07368] Worker 1 uses CPU cores [1]
|
216 |
+
[2025-07-13 11:35:31,517][07359] Conv encoder output size: 512
|
217 |
+
[2025-07-13 11:35:31,517][07359] Policy head output size: 512
|
218 |
+
[2025-07-13 11:35:31,549][07359] Created Actor Critic model with architecture:
|
219 |
+
[2025-07-13 11:35:31,550][07359] ActorCriticSharedWeights(
|
220 |
+
(obs_normalizer): ObservationNormalizer(
|
221 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
222 |
+
(running_mean_std): ModuleDict(
|
223 |
+
(obs): RunningMeanStdInPlace()
|
224 |
+
)
|
225 |
+
)
|
226 |
+
)
|
227 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
228 |
+
(encoder): VizdoomEncoder(
|
229 |
+
(basic_encoder): ConvEncoder(
|
230 |
+
(enc): RecursiveScriptModule(
|
231 |
+
original_name=ConvEncoderImpl
|
232 |
+
(conv_head): RecursiveScriptModule(
|
233 |
+
original_name=Sequential
|
234 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
235 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
236 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
237 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
238 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
239 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
240 |
+
)
|
241 |
+
(mlp_layers): RecursiveScriptModule(
|
242 |
+
original_name=Sequential
|
243 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
244 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
245 |
+
)
|
246 |
+
)
|
247 |
+
)
|
248 |
+
)
|
249 |
+
(core): ModelCoreRNN(
|
250 |
+
(core): GRU(512, 512)
|
251 |
+
)
|
252 |
+
(decoder): MlpDecoder(
|
253 |
+
(mlp): Identity()
|
254 |
+
)
|
255 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
256 |
+
(action_parameterization): ActionParameterizationDefault(
|
257 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
258 |
+
)
|
259 |
+
)
|
260 |
+
[2025-07-13 11:35:31,593][07367] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
261 |
+
[2025-07-13 11:35:31,594][07367] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
262 |
+
[2025-07-13 11:35:31,634][07367] Num visible devices: 1
|
263 |
+
[2025-07-13 11:35:31,636][07369] Worker 0 uses CPU cores [0]
|
264 |
+
[2025-07-13 11:35:31,742][07359] Using optimizer <class 'torch.optim.adam.Adam'>
|
265 |
+
[2025-07-13 11:35:33,126][07359] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
|
266 |
+
[2025-07-13 11:35:33,127][07359] Could not load from checkpoint, attempt 0
|
267 |
+
Traceback (most recent call last):
|
268 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
|
269 |
+
checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
|
270 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
271 |
+
File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
|
272 |
+
raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
|
273 |
+
_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
|
274 |
+
(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
|
275 |
+
(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
|
276 |
+
WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
|
277 |
+
|
278 |
+
Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
|
279 |
+
[2025-07-13 11:35:33,130][07359] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
|
280 |
+
[2025-07-13 11:35:33,131][07359] Could not load from checkpoint, attempt 1
|
281 |
+
Traceback (most recent call last):
|
282 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
|
283 |
+
checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
|
284 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
285 |
+
File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
|
286 |
+
raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
|
287 |
+
_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
|
288 |
+
(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
|
289 |
+
(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
|
290 |
+
WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
|
291 |
+
|
292 |
+
Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
|
293 |
+
[2025-07-13 11:35:33,132][07359] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
|
294 |
+
[2025-07-13 11:35:33,132][07359] Could not load from checkpoint, attempt 2
|
295 |
+
Traceback (most recent call last):
|
296 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
|
297 |
+
checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
|
298 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
299 |
+
File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
|
300 |
+
raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
|
301 |
+
_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
|
302 |
+
(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
|
303 |
+
(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
|
304 |
+
WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
|
305 |
+
|
306 |
+
Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
|
307 |
+
[2025-07-13 11:35:33,133][07359] Did not load from checkpoint, starting from scratch!
|
308 |
+
[2025-07-13 11:35:33,133][07359] Initialized policy 0 weights for model version 0
|
309 |
+
[2025-07-13 11:35:33,143][07359] LearnerWorker_p0 finished initialization!
|
310 |
+
[2025-07-13 11:35:33,143][07359] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
311 |
+
[2025-07-13 11:35:33,241][07367] RunningMeanStd input shape: (3, 72, 128)
|
312 |
+
[2025-07-13 11:35:33,243][07367] RunningMeanStd input shape: (1,)
|
313 |
+
[2025-07-13 11:35:33,253][07367] ConvEncoder: input_channels=3
|
314 |
+
[2025-07-13 11:35:33,370][07367] Conv encoder output size: 512
|
315 |
+
[2025-07-13 11:35:33,370][07367] Policy head output size: 512
|
316 |
+
[2025-07-13 11:35:33,403][06939] Inference worker 0-0 is ready!
|
317 |
+
[2025-07-13 11:35:33,404][06939] All inference workers are ready! Signal rollout workers to start!
|
318 |
+
[2025-07-13 11:35:33,457][07368] Doom resolution: 160x120, resize resolution: (128, 72)
|
319 |
+
[2025-07-13 11:35:33,456][07369] Doom resolution: 160x120, resize resolution: (128, 72)
|
320 |
+
[2025-07-13 11:35:33,844][07369] Decorrelating experience for 0 frames...
|
321 |
+
[2025-07-13 11:35:33,851][07368] Decorrelating experience for 0 frames...
|
322 |
+
[2025-07-13 11:35:34,276][07369] Decorrelating experience for 32 frames...
|
323 |
+
[2025-07-13 11:35:34,286][07368] Decorrelating experience for 32 frames...
|
324 |
+
[2025-07-13 11:35:35,802][06939] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
325 |
+
[2025-07-13 11:35:35,811][06939] Avg episode reward: [(0, '3.216')]
|
326 |
+
[2025-07-13 11:35:40,802][06939] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 273.6. Samples: 1368. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
327 |
+
[2025-07-13 11:35:40,807][06939] Avg episode reward: [(0, '4.118')]
|
328 |
+
[2025-07-13 11:35:44,442][06939] Heartbeat connected on Batcher_0
|
329 |
+
[2025-07-13 11:35:44,446][06939] Heartbeat connected on LearnerWorker_p0
|
330 |
+
[2025-07-13 11:35:44,456][06939] Heartbeat connected on InferenceWorker_p0-w0
|
331 |
+
[2025-07-13 11:35:44,462][06939] Heartbeat connected on RolloutWorker_w0
|
332 |
+
[2025-07-13 11:35:44,473][06939] Heartbeat connected on RolloutWorker_w1
|
333 |
+
[2025-07-13 11:35:45,802][06939] Fps is (10 sec: 1638.4, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 16384. Throughput: 0: 425.2. Samples: 4252. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
334 |
+
[2025-07-13 11:35:45,803][06939] Avg episode reward: [(0, '4.094')]
|
335 |
+
[2025-07-13 11:35:50,802][06939] Fps is (10 sec: 2457.7, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 24576. Throughput: 0: 400.2. Samples: 6003. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
336 |
+
[2025-07-13 11:35:50,803][06939] Avg episode reward: [(0, '4.254')]
|
337 |
+
[2025-07-13 11:35:55,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1843.2, 300 sec: 1843.2). Total num frames: 36864. Throughput: 0: 435.6. Samples: 8712. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
338 |
+
[2025-07-13 11:35:55,806][06939] Avg episode reward: [(0, '4.370')]
|
339 |
+
[2025-07-13 11:35:57,194][07367] Updated weights for policy 0, policy_version 10 (0.0023)
|
340 |
+
[2025-07-13 11:36:00,802][06939] Fps is (10 sec: 2457.6, 60 sec: 1966.1, 300 sec: 1966.1). Total num frames: 49152. Throughput: 0: 488.8. Samples: 12220. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
341 |
+
[2025-07-13 11:36:00,805][06939] Avg episode reward: [(0, '4.427')]
|
342 |
+
[2025-07-13 11:36:05,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 57344. Throughput: 0: 451.5. Samples: 13545. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
343 |
+
[2025-07-13 11:36:05,803][06939] Avg episode reward: [(0, '4.483')]
|
344 |
+
[2025-07-13 11:36:10,802][06939] Fps is (10 sec: 1638.4, 60 sec: 1872.5, 300 sec: 1872.5). Total num frames: 65536. Throughput: 0: 475.7. Samples: 16650. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
345 |
+
[2025-07-13 11:36:10,806][06939] Avg episode reward: [(0, '4.508')]
|
346 |
+
[2025-07-13 11:36:15,804][06939] Fps is (10 sec: 2047.6, 60 sec: 1945.5, 300 sec: 1945.5). Total num frames: 77824. Throughput: 0: 490.3. Samples: 19613. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
347 |
+
[2025-07-13 11:36:15,807][06939] Avg episode reward: [(0, '4.467')]
|
348 |
+
[2025-07-13 11:36:18,157][07367] Updated weights for policy 0, policy_version 20 (0.0020)
|
349 |
+
[2025-07-13 11:36:20,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 86016. Throughput: 0: 459.3. Samples: 20670. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
350 |
+
[2025-07-13 11:36:20,805][06939] Avg episode reward: [(0, '4.500')]
|
351 |
+
[2025-07-13 11:36:25,802][06939] Fps is (10 sec: 2048.5, 60 sec: 1966.1, 300 sec: 1966.1). Total num frames: 98304. Throughput: 0: 505.8. Samples: 24129. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
352 |
+
[2025-07-13 11:36:25,805][06939] Avg episode reward: [(0, '4.592')]
|
353 |
+
[2025-07-13 11:36:30,806][06939] Fps is (10 sec: 2047.1, 60 sec: 1936.1, 300 sec: 1936.1). Total num frames: 106496. Throughput: 0: 505.5. Samples: 27002. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
354 |
+
[2025-07-13 11:36:30,810][06939] Avg episode reward: [(0, '4.454')]
|
355 |
+
[2025-07-13 11:36:30,813][07359] Saving new best policy, reward=4.454!
|
356 |
+
[2025-07-13 11:36:35,802][06939] Fps is (10 sec: 2048.0, 60 sec: 1979.7, 300 sec: 1979.7). Total num frames: 118784. Throughput: 0: 499.9. Samples: 28498. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
357 |
+
[2025-07-13 11:36:35,803][06939] Avg episode reward: [(0, '4.456')]
|
358 |
+
[2025-07-13 11:36:35,814][07359] Saving new best policy, reward=4.456!
|
359 |
+
[2025-07-13 11:36:37,443][07367] Updated weights for policy 0, policy_version 30 (0.0014)
|
360 |
+
[2025-07-13 11:36:40,802][06939] Fps is (10 sec: 2048.9, 60 sec: 2116.3, 300 sec: 1953.5). Total num frames: 126976. Throughput: 0: 515.4. Samples: 31905. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
361 |
+
[2025-07-13 11:36:40,809][06939] Avg episode reward: [(0, '4.306')]
|
362 |
+
[2025-07-13 11:36:45,802][06939] Fps is (10 sec: 2048.0, 60 sec: 2048.0, 300 sec: 1989.5). Total num frames: 139264. Throughput: 0: 498.6. Samples: 34659. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
363 |
+
[2025-07-13 11:36:45,805][06939] Avg episode reward: [(0, '4.352')]
|
364 |
+
[2025-07-13 11:36:50,803][06939] Fps is (10 sec: 2047.7, 60 sec: 2047.9, 300 sec: 1966.0). Total num frames: 147456. Throughput: 0: 507.2. Samples: 36371. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
365 |
+
[2025-07-13 11:36:50,808][06939] Avg episode reward: [(0, '4.362')]
|
366 |
+
[2025-07-13 11:36:55,802][06939] Fps is (10 sec: 1638.3, 60 sec: 1979.7, 300 sec: 1945.6). Total num frames: 155648. Throughput: 0: 507.1. Samples: 39470. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
367 |
+
[2025-07-13 11:36:55,810][06939] Avg episode reward: [(0, '4.446')]
|
368 |
+
[2025-07-13 11:36:58,610][07367] Updated weights for policy 0, policy_version 40 (0.0018)
|
369 |
+
[2025-07-13 11:37:00,802][06939] Fps is (10 sec: 2048.4, 60 sec: 1979.7, 300 sec: 1975.7). Total num frames: 167936. Throughput: 0: 498.2. Samples: 42031. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
370 |
+
[2025-07-13 11:37:00,804][06939] Avg episode reward: [(0, '4.559')]
|
371 |
+
[2025-07-13 11:37:00,808][07359] Saving new best policy, reward=4.559!
|
372 |
+
[2025-07-13 11:37:05,805][06939] Fps is (10 sec: 2047.5, 60 sec: 1979.6, 300 sec: 1956.9). Total num frames: 176128. Throughput: 0: 511.3. Samples: 43682. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0)
|
373 |
+
[2025-07-13 11:37:05,809][06939] Avg episode reward: [(0, '4.462')]
|
374 |
+
[2025-07-13 11:37:10,802][06939] Fps is (10 sec: 1638.4, 60 sec: 1979.7, 300 sec: 1940.2). Total num frames: 184320. Throughput: 0: 498.0. Samples: 46541. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
375 |
+
[2025-07-13 11:37:10,806][06939] Avg episode reward: [(0, '4.346')]
|
376 |
+
[2025-07-13 11:37:15,802][06939] Fps is (10 sec: 2048.6, 60 sec: 1979.8, 300 sec: 1966.1). Total num frames: 196608. Throughput: 0: 504.4. Samples: 49699. Policy #0 lag: (min: 0.0, avg: 0.0, max: 1.0)
|
377 |
+
[2025-07-13 11:37:15,805][06939] Avg episode reward: [(0, '4.272')]
|
378 |
+
[2025-07-13 11:37:15,814][07359] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000048_196608.pth...
|
379 |
+
[2025-07-13 11:37:15,891][07359] Removing ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000048_196608.pth
|
380 |
+
[2025-07-13 11:37:18,217][07359] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth...
|
381 |
+
[2025-07-13 11:37:18,218][07359] Stopping Batcher_0...
|
382 |
+
[2025-07-13 11:37:18,225][07359] Loop batcher_evt_loop terminating...
|
383 |
+
[2025-07-13 11:37:18,223][06939] Component Batcher_0 stopped!
|
384 |
+
[2025-07-13 11:37:18,232][07367] Updated weights for policy 0, policy_version 50 (0.0014)
|
385 |
+
[2025-07-13 11:37:18,266][07367] Weights refcount: 2 0
|
386 |
+
[2025-07-13 11:37:18,268][06939] Component InferenceWorker_p0-w0 stopped!
|
387 |
+
[2025-07-13 11:37:18,271][07367] Stopping InferenceWorker_p0-w0...
|
388 |
+
[2025-07-13 11:37:18,273][07367] Loop inference_proc0-0_evt_loop terminating...
|
389 |
+
[2025-07-13 11:37:18,298][07359] Removing ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth
|
390 |
+
[2025-07-13 11:37:18,307][07359] Saving ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth...
|
391 |
+
[2025-07-13 11:37:18,368][06939] Component RolloutWorker_w1 stopped!
|
392 |
+
[2025-07-13 11:37:18,371][07368] Stopping RolloutWorker_w1...
|
393 |
+
[2025-07-13 11:37:18,374][07368] Loop rollout_proc1_evt_loop terminating...
|
394 |
+
[2025-07-13 11:37:18,387][07359] Removing ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000050_204800.pth
|
395 |
+
[2025-07-13 11:37:18,396][07359] Stopping LearnerWorker_p0...
|
396 |
+
[2025-07-13 11:37:18,396][06939] Component LearnerWorker_p0 stopped!
|
397 |
+
[2025-07-13 11:37:18,396][07359] Loop learner_proc0_evt_loop terminating...
|
398 |
+
[2025-07-13 11:37:18,401][06939] Component RolloutWorker_w0 stopped!
|
399 |
+
[2025-07-13 11:37:18,401][07369] Stopping RolloutWorker_w0...
|
400 |
+
[2025-07-13 11:37:18,403][06939] Waiting for process learner_proc0 to stop...
|
401 |
+
[2025-07-13 11:37:18,404][07369] Loop rollout_proc0_evt_loop terminating...
|
402 |
+
[2025-07-13 11:37:19,546][06939] Waiting for process inference_proc0-0 to join...
|
403 |
+
[2025-07-13 11:37:19,549][06939] Waiting for process rollout_proc0 to join...
|
404 |
+
[2025-07-13 11:37:19,919][06939] Waiting for process rollout_proc1 to join...
|
405 |
+
[2025-07-13 11:37:19,921][06939] Batcher 0 profile tree view:
|
406 |
+
batching: 1.1778, releasing_batches: 0.0012
|
407 |
+
[2025-07-13 11:37:19,923][06939] InferenceWorker_p0-w0 profile tree view:
|
408 |
+
wait_policy: 0.0000
|
409 |
+
wait_policy_total: 15.3290
|
410 |
+
update_model: 1.2617
|
411 |
+
weight_update: 0.0014
|
412 |
+
one_step: 0.0035
|
413 |
+
handle_policy_step: 83.1065
|
414 |
+
deserialize: 1.7763, stack: 0.5898, obs_to_device_normalize: 20.5143, forward: 43.8813, send_messages: 2.3863
|
415 |
+
prepare_outputs: 10.0497
|
416 |
+
to_cpu: 6.0774
|
417 |
+
[2025-07-13 11:37:19,924][06939] Learner 0 profile tree view:
|
418 |
+
misc: 0.0002, prepare_batch: 1.8202
|
419 |
+
train: 5.3572
|
420 |
+
epoch_init: 0.0002, minibatch_init: 0.0003, losses_postprocess: 0.0286, kl_divergence: 0.0488, after_optimizer: 1.7859
|
421 |
+
calculate_losses: 1.6856
|
422 |
+
losses_init: 0.0002, forward_head: 0.4117, bptt_initial: 0.9225, tail: 0.0585, advantages_returns: 0.0108, losses: 0.1807
|
423 |
+
bptt: 0.0892
|
424 |
+
bptt_forward_core: 0.0844
|
425 |
+
update: 1.7855
|
426 |
+
clip: 0.0915
|
427 |
+
[2025-07-13 11:37:19,925][06939] RolloutWorker_w0 profile tree view:
|
428 |
+
wait_for_trajectories: 0.0519, enqueue_policy_requests: 12.9735, env_step: 38.3118, overhead: 1.9176, complete_rollouts: 0.5191
|
429 |
+
save_policy_outputs: 2.2653
|
430 |
+
split_output_tensors: 0.9149
|
431 |
+
[2025-07-13 11:37:19,926][06939] RolloutWorker_w1 profile tree view:
|
432 |
+
wait_for_trajectories: 0.0577, enqueue_policy_requests: 12.6762, env_step: 38.4079, overhead: 1.8928, complete_rollouts: 0.3994
|
433 |
+
save_policy_outputs: 2.4013
|
434 |
+
split_output_tensors: 0.9375
|
435 |
+
[2025-07-13 11:37:19,927][06939] Loop Runner_EvtLoop terminating...
|
436 |
+
[2025-07-13 11:37:19,930][06939] Runner profile tree view:
|
437 |
+
main_loop: 115.4634
|
438 |
+
[2025-07-13 11:37:19,931][06939] Collected {0: 204800}, FPS: 1773.7
|
439 |
+
[2025-07-13 11:37:19,942][06939] Environment doom_basic already registered, overwriting...
|
440 |
+
[2025-07-13 11:37:19,943][06939] Environment doom_two_colors_easy already registered, overwriting...
|
441 |
+
[2025-07-13 11:37:19,944][06939] Environment doom_two_colors_hard already registered, overwriting...
|
442 |
+
[2025-07-13 11:37:19,945][06939] Environment doom_dm already registered, overwriting...
|
443 |
+
[2025-07-13 11:37:19,946][06939] Environment doom_dwango5 already registered, overwriting...
|
444 |
+
[2025-07-13 11:37:19,947][06939] Environment doom_my_way_home_flat_actions already registered, overwriting...
|
445 |
+
[2025-07-13 11:37:19,947][06939] Environment doom_defend_the_center_flat_actions already registered, overwriting...
|
446 |
+
[2025-07-13 11:37:19,948][06939] Environment doom_my_way_home already registered, overwriting...
|
447 |
+
[2025-07-13 11:37:19,952][06939] Environment doom_deadly_corridor already registered, overwriting...
|
448 |
+
[2025-07-13 11:37:19,953][06939] Environment doom_defend_the_center already registered, overwriting...
|
449 |
+
[2025-07-13 11:37:19,954][06939] Environment doom_defend_the_line already registered, overwriting...
|
450 |
+
[2025-07-13 11:37:19,955][06939] Environment doom_health_gathering already registered, overwriting...
|
451 |
+
[2025-07-13 11:37:19,956][06939] Environment doom_health_gathering_supreme already registered, overwriting...
|
452 |
+
[2025-07-13 11:37:19,956][06939] Environment doom_battle already registered, overwriting...
|
453 |
+
[2025-07-13 11:37:19,957][06939] Environment doom_battle2 already registered, overwriting...
|
454 |
+
[2025-07-13 11:37:19,958][06939] Environment doom_duel_bots already registered, overwriting...
|
455 |
+
[2025-07-13 11:37:19,959][06939] Environment doom_deathmatch_bots already registered, overwriting...
|
456 |
+
[2025-07-13 11:37:19,959][06939] Environment doom_duel already registered, overwriting...
|
457 |
+
[2025-07-13 11:37:19,960][06939] Environment doom_deathmatch_full already registered, overwriting...
|
458 |
+
[2025-07-13 11:37:19,961][06939] Environment doom_benchmark already registered, overwriting...
|
459 |
+
[2025-07-13 11:37:19,962][06939] register_encoder_factory: <function make_vizdoom_encoder at 0x7f16fee9a340>
|
460 |
+
[2025-07-13 11:39:55,913][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
|
461 |
+
[2025-07-13 11:39:55,914][06939] Adding new argument 'no_render'=False that is not in the saved config file!
|
462 |
+
[2025-07-13 11:39:55,915][06939] Adding new argument 'save_video'=True that is not in the saved config file!
|
463 |
+
[2025-07-13 11:39:55,916][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
464 |
+
[2025-07-13 11:39:55,917][06939] Adding new argument 'video_name'=None that is not in the saved config file!
|
465 |
+
[2025-07-13 11:39:55,917][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
466 |
+
[2025-07-13 11:39:55,918][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
|
467 |
+
[2025-07-13 11:39:55,919][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
468 |
+
[2025-07-13 11:39:55,920][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
469 |
+
[2025-07-13 11:39:55,921][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
470 |
+
[2025-07-13 11:39:55,922][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
471 |
+
[2025-07-13 11:39:55,922][06939] Adding new argument 'train_script'=None that is not in the saved config file!
|
472 |
+
[2025-07-13 11:39:55,923][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
473 |
+
[2025-07-13 11:39:55,924][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
|
474 |
+
[2025-07-13 11:39:55,953][06939] Doom resolution: 160x120, resize resolution: (128, 72)
|
475 |
+
[2025-07-13 11:39:55,957][06939] RunningMeanStd input shape: (3, 72, 128)
|
476 |
+
[2025-07-13 11:39:55,959][06939] RunningMeanStd input shape: (1,)
|
477 |
+
[2025-07-13 11:39:55,974][06939] ConvEncoder: input_channels=3
|
478 |
+
[2025-07-13 11:39:56,088][06939] Conv encoder output size: 512
|
479 |
+
[2025-07-13 11:39:56,090][06939] Policy head output size: 512
|
480 |
+
[2025-07-13 11:39:56,298][06939] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
|
481 |
+
[2025-07-13 11:39:56,305][06939] Could not load from checkpoint, attempt 0
|
482 |
+
Traceback (most recent call last):
|
483 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
|
484 |
+
checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
|
485 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
486 |
+
File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
|
487 |
+
raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
|
488 |
+
_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
|
489 |
+
(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
|
490 |
+
(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
|
491 |
+
WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
|
492 |
+
|
493 |
+
Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
|
494 |
+
[2025-07-13 11:39:56,308][06939] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
|
495 |
+
[2025-07-13 11:39:56,310][06939] Could not load from checkpoint, attempt 1
|
496 |
+
Traceback (most recent call last):
|
497 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
|
498 |
+
checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
|
499 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
500 |
+
File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
|
501 |
+
raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
|
502 |
+
_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
|
503 |
+
(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
|
504 |
+
(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
|
505 |
+
WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
|
506 |
+
|
507 |
+
Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
|
508 |
+
[2025-07-13 11:39:56,311][06939] Loading state from checkpoint ./train_dir/vizdoom_exp/checkpoint_p0/checkpoint_000000053_217088.pth...
|
509 |
+
[2025-07-13 11:39:56,313][06939] Could not load from checkpoint, attempt 2
|
510 |
+
Traceback (most recent call last):
|
511 |
+
File "/usr/local/lib/python3.11/dist-packages/sample_factory/algo/learning/learner.py", line 281, in load_checkpoint
|
512 |
+
checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
|
513 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
514 |
+
File "/usr/local/lib/python3.11/dist-packages/torch/serialization.py", line 1470, in load
|
515 |
+
raise pickle.UnpicklingError(_get_wo_message(str(e))) from None
|
516 |
+
_pickle.UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m.
|
517 |
+
(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
|
518 |
+
(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
|
519 |
+
WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([scalar])` or the `torch.serialization.safe_globals([scalar])` context manager to allowlist this global if you trust this class/function.
|
520 |
+
|
521 |
+
Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.
|
522 |
+
[2025-07-13 11:41:57,536][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
|
523 |
+
[2025-07-13 11:41:57,537][06939] Adding new argument 'no_render'=False that is not in the saved config file!
|
524 |
+
[2025-07-13 11:41:57,539][06939] Adding new argument 'save_video'=True that is not in the saved config file!
|
525 |
+
[2025-07-13 11:41:57,543][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
526 |
+
[2025-07-13 11:41:57,547][06939] Adding new argument 'video_name'=None that is not in the saved config file!
|
527 |
+
[2025-07-13 11:41:57,548][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
528 |
+
[2025-07-13 11:41:57,549][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
|
529 |
+
[2025-07-13 11:41:57,550][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
530 |
+
[2025-07-13 11:41:57,551][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
531 |
+
[2025-07-13 11:41:57,552][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
532 |
+
[2025-07-13 11:41:57,553][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
533 |
+
[2025-07-13 11:41:57,554][06939] Adding new argument 'train_script'=None that is not in the saved config file!
|
534 |
+
[2025-07-13 11:41:57,555][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
535 |
+
[2025-07-13 11:41:57,556][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
|
536 |
+
[2025-07-13 11:41:57,602][06939] RunningMeanStd input shape: (3, 72, 128)
|
537 |
+
[2025-07-13 11:41:57,603][06939] RunningMeanStd input shape: (1,)
|
538 |
+
[2025-07-13 11:41:57,618][06939] ConvEncoder: input_channels=3
|
539 |
+
[2025-07-13 11:41:57,675][06939] Conv encoder output size: 512
|
540 |
+
[2025-07-13 11:41:57,676][06939] Policy head output size: 512
|
541 |
+
[2025-07-13 11:42:44,481][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
|
542 |
+
[2025-07-13 11:42:44,483][06939] Adding new argument 'no_render'=False that is not in the saved config file!
|
543 |
+
[2025-07-13 11:42:44,484][06939] Adding new argument 'save_video'=True that is not in the saved config file!
|
544 |
+
[2025-07-13 11:42:44,486][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
545 |
+
[2025-07-13 11:42:44,489][06939] Adding new argument 'video_name'=None that is not in the saved config file!
|
546 |
+
[2025-07-13 11:42:44,489][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
547 |
+
[2025-07-13 11:42:44,491][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
|
548 |
+
[2025-07-13 11:42:44,492][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
549 |
+
[2025-07-13 11:42:44,492][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
550 |
+
[2025-07-13 11:42:44,493][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
551 |
+
[2025-07-13 11:42:44,494][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
552 |
+
[2025-07-13 11:42:44,494][06939] Adding new argument 'train_script'=None that is not in the saved config file!
|
553 |
+
[2025-07-13 11:42:44,499][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
554 |
+
[2025-07-13 11:42:44,501][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
|
555 |
+
[2025-07-13 11:42:44,558][06939] RunningMeanStd input shape: (3, 72, 128)
|
556 |
+
[2025-07-13 11:42:44,560][06939] RunningMeanStd input shape: (1,)
|
557 |
+
[2025-07-13 11:42:44,577][06939] ConvEncoder: input_channels=3
|
558 |
+
[2025-07-13 11:42:44,631][06939] Conv encoder output size: 512
|
559 |
+
[2025-07-13 11:42:44,633][06939] Policy head output size: 512
|
560 |
+
[2025-07-13 11:43:21,556][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
|
561 |
+
[2025-07-13 11:43:21,558][06939] Adding new argument 'no_render'=True that is not in the saved config file!
|
562 |
+
[2025-07-13 11:43:21,559][06939] Adding new argument 'save_video'=True that is not in the saved config file!
|
563 |
+
[2025-07-13 11:43:21,560][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
564 |
+
[2025-07-13 11:43:21,562][06939] Adding new argument 'video_name'=None that is not in the saved config file!
|
565 |
+
[2025-07-13 11:43:21,563][06939] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
566 |
+
[2025-07-13 11:43:21,564][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
|
567 |
+
[2025-07-13 11:43:21,565][06939] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
568 |
+
[2025-07-13 11:43:21,567][06939] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
569 |
+
[2025-07-13 11:43:21,568][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
570 |
+
[2025-07-13 11:43:21,569][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
571 |
+
[2025-07-13 11:43:21,570][06939] Adding new argument 'train_script'=None that is not in the saved config file!
|
572 |
+
[2025-07-13 11:43:21,571][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
573 |
+
[2025-07-13 11:43:21,572][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
|
574 |
+
[2025-07-13 11:43:21,603][06939] RunningMeanStd input shape: (3, 72, 128)
|
575 |
+
[2025-07-13 11:43:21,604][06939] RunningMeanStd input shape: (1,)
|
576 |
+
[2025-07-13 11:43:21,615][06939] ConvEncoder: input_channels=3
|
577 |
+
[2025-07-13 11:43:21,654][06939] Conv encoder output size: 512
|
578 |
+
[2025-07-13 11:43:21,657][06939] Policy head output size: 512
|
579 |
+
[2025-07-13 11:43:22,337][06939] Num frames 100...
|
580 |
+
[2025-07-13 11:43:22,472][06939] Num frames 200...
|
581 |
+
[2025-07-13 11:43:22,604][06939] Num frames 300...
|
582 |
+
[2025-07-13 11:43:22,737][06939] Num frames 400...
|
583 |
+
[2025-07-13 11:43:22,814][06939] Avg episode rewards: #0: 5.160, true rewards: #0: 4.160
|
584 |
+
[2025-07-13 11:43:22,815][06939] Avg episode reward: 5.160, avg true_objective: 4.160
|
585 |
+
[2025-07-13 11:43:22,936][06939] Num frames 500...
|
586 |
+
[2025-07-13 11:43:23,072][06939] Num frames 600...
|
587 |
+
[2025-07-13 11:43:23,202][06939] Num frames 700...
|
588 |
+
[2025-07-13 11:43:23,349][06939] Avg episode rewards: #0: 4.840, true rewards: #0: 3.840
|
589 |
+
[2025-07-13 11:43:23,350][06939] Avg episode reward: 4.840, avg true_objective: 3.840
|
590 |
+
[2025-07-13 11:43:23,395][06939] Num frames 800...
|
591 |
+
[2025-07-13 11:43:23,523][06939] Num frames 900...
|
592 |
+
[2025-07-13 11:43:23,655][06939] Num frames 1000...
|
593 |
+
[2025-07-13 11:43:23,790][06939] Num frames 1100...
|
594 |
+
[2025-07-13 11:43:23,911][06939] Avg episode rewards: #0: 4.507, true rewards: #0: 3.840
|
595 |
+
[2025-07-13 11:43:23,912][06939] Avg episode reward: 4.507, avg true_objective: 3.840
|
596 |
+
[2025-07-13 11:43:23,984][06939] Num frames 1200...
|
597 |
+
[2025-07-13 11:43:24,109][06939] Num frames 1300...
|
598 |
+
[2025-07-13 11:43:24,240][06939] Num frames 1400...
|
599 |
+
[2025-07-13 11:43:24,368][06939] Num frames 1500...
|
600 |
+
[2025-07-13 11:43:24,473][06939] Avg episode rewards: #0: 4.340, true rewards: #0: 3.840
|
601 |
+
[2025-07-13 11:43:24,474][06939] Avg episode reward: 4.340, avg true_objective: 3.840
|
602 |
+
[2025-07-13 11:43:24,569][06939] Num frames 1600...
|
603 |
+
[2025-07-13 11:43:24,702][06939] Num frames 1700...
|
604 |
+
[2025-07-13 11:43:24,892][06939] Avg episode rewards: #0: 3.992, true rewards: #0: 3.592
|
605 |
+
[2025-07-13 11:43:24,893][06939] Avg episode reward: 3.992, avg true_objective: 3.592
|
606 |
+
[2025-07-13 11:43:36,753][06939] Replay video saved to ./train_dir/vizdoom_exp/replay.mp4!
|
607 |
+
[2025-07-13 11:49:27,400][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
|
608 |
+
[2025-07-13 11:49:27,401][06939] Overriding arg 'num_workers' with value 1 passed from command line
|
609 |
+
[2025-07-13 11:49:27,402][06939] Adding new argument 'no_render'=True that is not in the saved config file!
|
610 |
+
[2025-07-13 11:49:27,403][06939] Adding new argument 'save_video'=True that is not in the saved config file!
|
611 |
+
[2025-07-13 11:49:27,404][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
612 |
+
[2025-07-13 11:49:27,405][06939] Adding new argument 'video_name'=None that is not in the saved config file!
|
613 |
+
[2025-07-13 11:49:27,406][06939] Adding new argument 'max_num_frames'=1600 that is not in the saved config file!
|
614 |
+
[2025-07-13 11:49:27,407][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
|
615 |
+
[2025-07-13 11:49:27,408][06939] Adding new argument 'push_to_hub'=True that is not in the saved config file!
|
616 |
+
[2025-07-13 11:49:27,409][06939] Adding new argument 'hf_repository'='lokeessshhhh/rl_vizdoom_health_gathering' that is not in the saved config file!
|
617 |
+
[2025-07-13 11:49:27,410][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
618 |
+
[2025-07-13 11:49:27,412][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
619 |
+
[2025-07-13 11:49:27,413][06939] Adding new argument 'train_script'=None that is not in the saved config file!
|
620 |
+
[2025-07-13 11:49:27,414][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
621 |
+
[2025-07-13 11:49:27,417][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
|
622 |
+
[2025-07-13 11:49:27,440][06939] RunningMeanStd input shape: (3, 72, 128)
|
623 |
+
[2025-07-13 11:49:27,442][06939] RunningMeanStd input shape: (1,)
|
624 |
+
[2025-07-13 11:49:27,452][06939] ConvEncoder: input_channels=3
|
625 |
+
[2025-07-13 11:49:27,490][06939] Conv encoder output size: 512
|
626 |
+
[2025-07-13 11:49:27,491][06939] Policy head output size: 512
|
627 |
+
[2025-07-13 11:49:27,932][06939] Num frames 100...
|
628 |
+
[2025-07-13 11:49:28,063][06939] Num frames 200...
|
629 |
+
[2025-07-13 11:49:28,194][06939] Num frames 300...
|
630 |
+
[2025-07-13 11:49:28,322][06939] Num frames 400...
|
631 |
+
[2025-07-13 11:49:28,439][06939] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480
|
632 |
+
[2025-07-13 11:49:28,441][06939] Avg episode reward: 5.480, avg true_objective: 4.480
|
633 |
+
[2025-07-13 11:49:28,510][06939] Num frames 500...
|
634 |
+
[2025-07-13 11:49:28,641][06939] Num frames 600...
|
635 |
+
[2025-07-13 11:49:28,780][06939] Num frames 700...
|
636 |
+
[2025-07-13 11:49:28,911][06939] Num frames 800...
|
637 |
+
[2025-07-13 11:49:29,008][06939] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
|
638 |
+
[2025-07-13 11:49:29,009][06939] Avg episode reward: 4.660, avg true_objective: 4.160
|
639 |
+
[2025-07-13 11:49:29,107][06939] Num frames 900...
|
640 |
+
[2025-07-13 11:49:29,241][06939] Num frames 1000...
|
641 |
+
[2025-07-13 11:49:29,372][06939] Num frames 1100...
|
642 |
+
[2025-07-13 11:49:29,503][06939] Num frames 1200...
|
643 |
+
[2025-07-13 11:49:29,580][06939] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
|
644 |
+
[2025-07-13 11:49:29,581][06939] Avg episode reward: 4.387, avg true_objective: 4.053
|
645 |
+
[2025-07-13 11:49:29,694][06939] Num frames 1300...
|
646 |
+
[2025-07-13 11:49:29,843][06939] Num frames 1400...
|
647 |
+
[2025-07-13 11:49:29,979][06939] Num frames 1500...
|
648 |
+
[2025-07-13 11:49:30,108][06939] Num frames 1600...
|
649 |
+
[2025-07-13 11:49:39,419][06939] Replay video saved to ./train_dir/vizdoom_exp/replay.mp4!
|
650 |
+
[2025-07-13 11:49:50,742][06939] The model has been pushed to https://huggingface.co/lokeessshhhh/rl_vizdoom_health_gathering
|
651 |
+
[2025-07-13 11:56:39,097][06939] Loading existing experiment configuration from ./train_dir/vizdoom_exp/config.json
|
652 |
+
[2025-07-13 11:56:39,099][06939] Overriding arg 'num_workers' with value 1 passed from command line
|
653 |
+
[2025-07-13 11:56:39,101][06939] Adding new argument 'no_render'=True that is not in the saved config file!
|
654 |
+
[2025-07-13 11:56:39,102][06939] Adding new argument 'save_video'=True that is not in the saved config file!
|
655 |
+
[2025-07-13 11:56:39,103][06939] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
656 |
+
[2025-07-13 11:56:39,105][06939] Adding new argument 'video_name'=None that is not in the saved config file!
|
657 |
+
[2025-07-13 11:56:39,105][06939] Adding new argument 'max_num_frames'=1600 that is not in the saved config file!
|
658 |
+
[2025-07-13 11:56:39,108][06939] Adding new argument 'max_num_episodes'=5 that is not in the saved config file!
|
659 |
+
[2025-07-13 11:56:39,109][06939] Adding new argument 'push_to_hub'=True that is not in the saved config file!
|
660 |
+
[2025-07-13 11:56:39,109][06939] Adding new argument 'hf_repository'='lokeessshhhh/rl_vizdoom_health_gathering' that is not in the saved config file!
|
661 |
+
[2025-07-13 11:56:39,110][06939] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
662 |
+
[2025-07-13 11:56:39,114][06939] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
663 |
+
[2025-07-13 11:56:39,115][06939] Adding new argument 'train_script'=None that is not in the saved config file!
|
664 |
+
[2025-07-13 11:56:39,115][06939] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
665 |
+
[2025-07-13 11:56:39,118][06939] Using frameskip 1 and render_action_repeat=4 for evaluation
|
666 |
+
[2025-07-13 11:56:39,161][06939] RunningMeanStd input shape: (3, 72, 128)
|
667 |
+
[2025-07-13 11:56:39,164][06939] RunningMeanStd input shape: (1,)
|
668 |
+
[2025-07-13 11:56:39,181][06939] ConvEncoder: input_channels=3
|
669 |
+
[2025-07-13 11:56:39,235][06939] Conv encoder output size: 512
|
670 |
+
[2025-07-13 11:56:39,236][06939] Policy head output size: 512
|
671 |
+
[2025-07-13 11:56:39,919][06939] Num frames 100...
|
672 |
+
[2025-07-13 11:56:40,107][06939] Num frames 200...
|
673 |
+
[2025-07-13 11:56:40,244][06939] Num frames 300...
|
674 |
+
[2025-07-13 11:56:40,408][06939] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
675 |
+
[2025-07-13 11:56:40,409][06939] Avg episode reward: 3.840, avg true_objective: 3.840
|
676 |
+
[2025-07-13 11:56:40,432][06939] Num frames 400...
|
677 |
+
[2025-07-13 11:56:40,557][06939] Num frames 500...
|
678 |
+
[2025-07-13 11:56:40,695][06939] Num frames 600...
|
679 |
+
[2025-07-13 11:56:40,823][06939] Num frames 700...
|
680 |
+
[2025-07-13 11:56:40,969][06939] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
681 |
+
[2025-07-13 11:56:40,970][06939] Avg episode reward: 3.840, avg true_objective: 3.840
|
682 |
+
[2025-07-13 11:56:41,012][06939] Num frames 800...
|
683 |
+
[2025-07-13 11:56:41,135][06939] Num frames 900...
|
684 |
+
[2025-07-13 11:56:41,266][06939] Num frames 1000...
|
685 |
+
[2025-07-13 11:56:41,352][06939] Avg episode rewards: #0: 3.413, true rewards: #0: 3.413
|
686 |
+
[2025-07-13 11:56:41,353][06939] Avg episode reward: 3.413, avg true_objective: 3.413
|
687 |
+
[2025-07-13 11:56:41,448][06939] Num frames 1100...
|
688 |
+
[2025-07-13 11:56:41,573][06939] Num frames 1200...
|
689 |
+
[2025-07-13 11:56:41,707][06939] Num frames 1300...
|
690 |
+
[2025-07-13 11:56:41,833][06939] Num frames 1400...
|
691 |
+
[2025-07-13 11:56:41,897][06939] Avg episode rewards: #0: 3.520, true rewards: #0: 3.520
|
692 |
+
[2025-07-13 11:56:41,899][06939] Avg episode reward: 3.520, avg true_objective: 3.520
|
693 |
+
[2025-07-13 11:56:42,019][06939] Num frames 1500...
|
694 |
+
[2025-07-13 11:56:42,146][06939] Num frames 1600...
|
695 |
+
[2025-07-13 11:56:50,594][06939] Replay video saved to ./train_dir/vizdoom_exp/replay.mp4!
|