Upload folder using huggingface_hub
Browse files- .summary/0/events.out.tfevents.1739294520.ffa6c6e41717 +3 -0
- README.md +1 -1
- checkpoint_p0/best_000003787_15511552_reward_35.659.pth +3 -0
- checkpoint_p0/checkpoint_000003898_15966208.pth +3 -0
- checkpoint_p0/checkpoint_000003908_16007168.pth +3 -0
- config.json +1 -1
- replay.mp4 +2 -2
- sf_log.txt +1108 -0
.summary/0/events.out.tfevents.1739294520.ffa6c6e41717
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9cb4ded56d96d6bd6a77bd748df27953b1349309a9d6bf68f62f486ae26481e
|
3 |
+
size 241066
|
README.md
CHANGED
@@ -15,7 +15,7 @@ model-index:
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
-
value:
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
+
value: 12.91 +/- 6.70
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
checkpoint_p0/best_000003787_15511552_reward_35.659.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10f4f20800ab4c7ad06a759e1dfeda56af6a4ae3964a45919517e787426e8632
|
3 |
+
size 34929243
|
checkpoint_p0/checkpoint_000003898_15966208.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a362f21e65481b2ccf40af622a98c9c348fc15117c322cbde51884bc6d77972
|
3 |
+
size 34929669
|
checkpoint_p0/checkpoint_000003908_16007168.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93ffe5860d7fbab2107dd88a0965fc6bae78756188512effc5f6efb6a59c8833
|
3 |
+
size 34929669
|
config.json
CHANGED
@@ -65,7 +65,7 @@
|
|
65 |
"summaries_use_frameskip": true,
|
66 |
"heartbeat_interval": 20,
|
67 |
"heartbeat_reporting_interval": 600,
|
68 |
-
"train_for_env_steps":
|
69 |
"train_for_seconds": 10000000000,
|
70 |
"save_every_sec": 120,
|
71 |
"keep_checkpoints": 2,
|
|
|
65 |
"summaries_use_frameskip": true,
|
66 |
"heartbeat_interval": 20,
|
67 |
"heartbeat_reporting_interval": 600,
|
68 |
+
"train_for_env_steps": 16000000,
|
69 |
"train_for_seconds": 10000000000,
|
70 |
"save_every_sec": 120,
|
71 |
"keep_checkpoints": 2,
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf6c0323b2e2ce78c1db868428c4feda0f0a46349f2436b6696e8a915f5d63d2
|
3 |
+
size 24516655
|
sf_log.txt
CHANGED
@@ -1880,3 +1880,1111 @@ main_loop: 183.8825
|
|
1880 |
[2025-02-11 17:19:29,046][02117] Avg episode rewards: #0: 23.822, true rewards: #0: 9.922
|
1881 |
[2025-02-11 17:19:29,047][02117] Avg episode reward: 23.822, avg true_objective: 9.922
|
1882 |
[2025-02-11 17:19:52,814][02117] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1880 |
[2025-02-11 17:19:29,046][02117] Avg episode rewards: #0: 23.822, true rewards: #0: 9.922
|
1881 |
[2025-02-11 17:19:29,047][02117] Avg episode reward: 23.822, avg true_objective: 9.922
|
1882 |
[2025-02-11 17:19:52,814][02117] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
1883 |
+
[2025-02-11 17:20:02,963][02117] The model has been pushed to https://huggingface.co/mjm54/doom_health_gathering_supreme
|
1884 |
+
[2025-02-11 17:22:00,187][02117] Environment doom_basic already registered, overwriting...
|
1885 |
+
[2025-02-11 17:22:00,191][02117] Environment doom_two_colors_easy already registered, overwriting...
|
1886 |
+
[2025-02-11 17:22:00,191][02117] Environment doom_two_colors_hard already registered, overwriting...
|
1887 |
+
[2025-02-11 17:22:00,194][02117] Environment doom_dm already registered, overwriting...
|
1888 |
+
[2025-02-11 17:22:00,195][02117] Environment doom_dwango5 already registered, overwriting...
|
1889 |
+
[2025-02-11 17:22:00,197][02117] Environment doom_my_way_home_flat_actions already registered, overwriting...
|
1890 |
+
[2025-02-11 17:22:00,198][02117] Environment doom_defend_the_center_flat_actions already registered, overwriting...
|
1891 |
+
[2025-02-11 17:22:00,199][02117] Environment doom_my_way_home already registered, overwriting...
|
1892 |
+
[2025-02-11 17:22:00,200][02117] Environment doom_deadly_corridor already registered, overwriting...
|
1893 |
+
[2025-02-11 17:22:00,201][02117] Environment doom_defend_the_center already registered, overwriting...
|
1894 |
+
[2025-02-11 17:22:00,202][02117] Environment doom_defend_the_line already registered, overwriting...
|
1895 |
+
[2025-02-11 17:22:00,206][02117] Environment doom_health_gathering already registered, overwriting...
|
1896 |
+
[2025-02-11 17:22:00,207][02117] Environment doom_health_gathering_supreme already registered, overwriting...
|
1897 |
+
[2025-02-11 17:22:00,209][02117] Environment doom_battle already registered, overwriting...
|
1898 |
+
[2025-02-11 17:22:00,210][02117] Environment doom_battle2 already registered, overwriting...
|
1899 |
+
[2025-02-11 17:22:00,212][02117] Environment doom_duel_bots already registered, overwriting...
|
1900 |
+
[2025-02-11 17:22:00,213][02117] Environment doom_deathmatch_bots already registered, overwriting...
|
1901 |
+
[2025-02-11 17:22:00,215][02117] Environment doom_duel already registered, overwriting...
|
1902 |
+
[2025-02-11 17:22:00,216][02117] Environment doom_deathmatch_full already registered, overwriting...
|
1903 |
+
[2025-02-11 17:22:00,218][02117] Environment doom_benchmark already registered, overwriting...
|
1904 |
+
[2025-02-11 17:22:00,219][02117] register_encoder_factory: <function make_vizdoom_encoder at 0x7da2c5ac6660>
|
1905 |
+
[2025-02-11 17:22:00,227][02117] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json
|
1906 |
+
[2025-02-11 17:22:00,229][02117] Overriding arg 'train_for_env_steps' with value 16000000 passed from command line
|
1907 |
+
[2025-02-11 17:22:00,234][02117] Experiment dir /content/train_dir/default_experiment already exists!
|
1908 |
+
[2025-02-11 17:22:00,235][02117] Resuming existing experiment from /content/train_dir/default_experiment...
|
1909 |
+
[2025-02-11 17:22:00,237][02117] Weights and Biases integration disabled
|
1910 |
+
[2025-02-11 17:22:00,240][02117] Environment var CUDA_VISIBLE_DEVICES is 0
|
1911 |
+
|
1912 |
+
[2025-02-11 17:22:02,431][02117] Starting experiment with the following configuration:
|
1913 |
+
help=False
|
1914 |
+
algo=APPO
|
1915 |
+
env=doom_health_gathering_supreme
|
1916 |
+
experiment=default_experiment
|
1917 |
+
train_dir=/content/train_dir
|
1918 |
+
restart_behavior=resume
|
1919 |
+
device=gpu
|
1920 |
+
seed=None
|
1921 |
+
num_policies=1
|
1922 |
+
async_rl=True
|
1923 |
+
serial_mode=False
|
1924 |
+
batched_sampling=False
|
1925 |
+
num_batches_to_accumulate=2
|
1926 |
+
worker_num_splits=2
|
1927 |
+
policy_workers_per_policy=1
|
1928 |
+
max_policy_lag=1000
|
1929 |
+
num_workers=10
|
1930 |
+
num_envs_per_worker=4
|
1931 |
+
batch_size=1024
|
1932 |
+
num_batches_per_epoch=1
|
1933 |
+
num_epochs=1
|
1934 |
+
rollout=32
|
1935 |
+
recurrence=32
|
1936 |
+
shuffle_minibatches=False
|
1937 |
+
gamma=0.99
|
1938 |
+
reward_scale=1.0
|
1939 |
+
reward_clip=1000.0
|
1940 |
+
value_bootstrap=False
|
1941 |
+
normalize_returns=True
|
1942 |
+
exploration_loss_coeff=0.001
|
1943 |
+
value_loss_coeff=0.5
|
1944 |
+
kl_loss_coeff=0.0
|
1945 |
+
exploration_loss=symmetric_kl
|
1946 |
+
gae_lambda=0.95
|
1947 |
+
ppo_clip_ratio=0.1
|
1948 |
+
ppo_clip_value=0.2
|
1949 |
+
with_vtrace=False
|
1950 |
+
vtrace_rho=1.0
|
1951 |
+
vtrace_c=1.0
|
1952 |
+
optimizer=adam
|
1953 |
+
adam_eps=1e-06
|
1954 |
+
adam_beta1=0.9
|
1955 |
+
adam_beta2=0.999
|
1956 |
+
max_grad_norm=4.0
|
1957 |
+
learning_rate=0.0001
|
1958 |
+
lr_schedule=constant
|
1959 |
+
lr_schedule_kl_threshold=0.008
|
1960 |
+
lr_adaptive_min=1e-06
|
1961 |
+
lr_adaptive_max=0.01
|
1962 |
+
obs_subtract_mean=0.0
|
1963 |
+
obs_scale=255.0
|
1964 |
+
normalize_input=True
|
1965 |
+
normalize_input_keys=None
|
1966 |
+
decorrelate_experience_max_seconds=0
|
1967 |
+
decorrelate_envs_on_one_worker=True
|
1968 |
+
actor_worker_gpus=[]
|
1969 |
+
set_workers_cpu_affinity=True
|
1970 |
+
force_envs_single_thread=False
|
1971 |
+
default_niceness=0
|
1972 |
+
log_to_file=True
|
1973 |
+
experiment_summaries_interval=10
|
1974 |
+
flush_summaries_interval=30
|
1975 |
+
stats_avg=100
|
1976 |
+
summaries_use_frameskip=True
|
1977 |
+
heartbeat_interval=20
|
1978 |
+
heartbeat_reporting_interval=600
|
1979 |
+
train_for_env_steps=16000000
|
1980 |
+
train_for_seconds=10000000000
|
1981 |
+
save_every_sec=120
|
1982 |
+
keep_checkpoints=2
|
1983 |
+
load_checkpoint_kind=latest
|
1984 |
+
save_milestones_sec=-1
|
1985 |
+
save_best_every_sec=5
|
1986 |
+
save_best_metric=reward
|
1987 |
+
save_best_after=100000
|
1988 |
+
benchmark=False
|
1989 |
+
encoder_mlp_layers=[512, 512]
|
1990 |
+
encoder_conv_architecture=convnet_simple
|
1991 |
+
encoder_conv_mlp_layers=[512]
|
1992 |
+
use_rnn=True
|
1993 |
+
rnn_size=512
|
1994 |
+
rnn_type=gru
|
1995 |
+
rnn_num_layers=1
|
1996 |
+
decoder_mlp_layers=[]
|
1997 |
+
nonlinearity=elu
|
1998 |
+
policy_initialization=orthogonal
|
1999 |
+
policy_init_gain=1.0
|
2000 |
+
actor_critic_share_weights=True
|
2001 |
+
adaptive_stddev=True
|
2002 |
+
continuous_tanh_scale=0.0
|
2003 |
+
initial_stddev=1.0
|
2004 |
+
use_env_info_cache=False
|
2005 |
+
env_gpu_actions=False
|
2006 |
+
env_gpu_observations=True
|
2007 |
+
env_frameskip=4
|
2008 |
+
env_framestack=1
|
2009 |
+
pixel_format=CHW
|
2010 |
+
use_record_episode_statistics=False
|
2011 |
+
with_wandb=False
|
2012 |
+
wandb_user=None
|
2013 |
+
wandb_project=sample_factory
|
2014 |
+
wandb_group=None
|
2015 |
+
wandb_job_type=SF
|
2016 |
+
wandb_tags=[]
|
2017 |
+
with_pbt=False
|
2018 |
+
pbt_mix_policies_in_one_env=True
|
2019 |
+
pbt_period_env_steps=5000000
|
2020 |
+
pbt_start_mutation=20000000
|
2021 |
+
pbt_replace_fraction=0.3
|
2022 |
+
pbt_mutation_rate=0.15
|
2023 |
+
pbt_replace_reward_gap=0.1
|
2024 |
+
pbt_replace_reward_gap_absolute=1e-06
|
2025 |
+
pbt_optimize_gamma=False
|
2026 |
+
pbt_target_objective=true_objective
|
2027 |
+
pbt_perturb_min=1.1
|
2028 |
+
pbt_perturb_max=1.5
|
2029 |
+
num_agents=-1
|
2030 |
+
num_humans=0
|
2031 |
+
num_bots=-1
|
2032 |
+
start_bot_difficulty=None
|
2033 |
+
timelimit=None
|
2034 |
+
res_w=128
|
2035 |
+
res_h=72
|
2036 |
+
wide_aspect_ratio=False
|
2037 |
+
eval_env_frameskip=1
|
2038 |
+
fps=35
|
2039 |
+
command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000
|
2040 |
+
cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000}
|
2041 |
+
git_hash=unknown
|
2042 |
+
git_repo_name=not a git repository
|
2043 |
+
[2025-02-11 17:22:02,433][02117] Saving configuration to /content/train_dir/default_experiment/config.json...
|
2044 |
+
[2025-02-11 17:22:02,436][02117] Rollout worker 0 uses device cpu
|
2045 |
+
[2025-02-11 17:22:02,436][02117] Rollout worker 1 uses device cpu
|
2046 |
+
[2025-02-11 17:22:02,437][02117] Rollout worker 2 uses device cpu
|
2047 |
+
[2025-02-11 17:22:02,438][02117] Rollout worker 3 uses device cpu
|
2048 |
+
[2025-02-11 17:22:02,440][02117] Rollout worker 4 uses device cpu
|
2049 |
+
[2025-02-11 17:22:02,441][02117] Rollout worker 5 uses device cpu
|
2050 |
+
[2025-02-11 17:22:02,443][02117] Rollout worker 6 uses device cpu
|
2051 |
+
[2025-02-11 17:22:02,444][02117] Rollout worker 7 uses device cpu
|
2052 |
+
[2025-02-11 17:22:02,445][02117] Rollout worker 8 uses device cpu
|
2053 |
+
[2025-02-11 17:22:02,446][02117] Rollout worker 9 uses device cpu
|
2054 |
+
[2025-02-11 17:22:02,496][02117] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
2055 |
+
[2025-02-11 17:22:02,497][02117] InferenceWorker_p0-w0: min num requests: 3
|
2056 |
+
[2025-02-11 17:22:02,536][02117] Starting all processes...
|
2057 |
+
[2025-02-11 17:22:02,537][02117] Starting process learner_proc0
|
2058 |
+
[2025-02-11 17:22:02,590][02117] Starting all processes...
|
2059 |
+
[2025-02-11 17:22:02,594][02117] Starting process inference_proc0-0
|
2060 |
+
[2025-02-11 17:22:02,595][02117] Starting process rollout_proc0
|
2061 |
+
[2025-02-11 17:22:02,595][02117] Starting process rollout_proc1
|
2062 |
+
[2025-02-11 17:22:02,595][02117] Starting process rollout_proc2
|
2063 |
+
[2025-02-11 17:22:02,598][02117] Starting process rollout_proc3
|
2064 |
+
[2025-02-11 17:22:02,601][02117] Starting process rollout_proc4
|
2065 |
+
[2025-02-11 17:22:02,602][02117] Starting process rollout_proc5
|
2066 |
+
[2025-02-11 17:22:02,602][02117] Starting process rollout_proc6
|
2067 |
+
[2025-02-11 17:22:02,603][02117] Starting process rollout_proc7
|
2068 |
+
[2025-02-11 17:22:02,604][02117] Starting process rollout_proc8
|
2069 |
+
[2025-02-11 17:22:02,608][02117] Starting process rollout_proc9
|
2070 |
+
[2025-02-11 17:22:05,761][12745] Worker 3 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
2071 |
+
[2025-02-11 17:22:05,787][12741] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
2072 |
+
[2025-02-11 17:22:05,792][12725] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
2073 |
+
[2025-02-11 17:22:05,792][12725] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
2074 |
+
[2025-02-11 17:22:05,812][12725] Num visible devices: 1
|
2075 |
+
[2025-02-11 17:22:05,831][12744] Worker 4 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
2076 |
+
[2025-02-11 17:22:05,855][12746] Worker 5 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
2077 |
+
[2025-02-11 17:22:05,860][12743] Worker 2 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
2078 |
+
[2025-02-11 17:22:05,873][12725] Starting seed is not provided
|
2079 |
+
[2025-02-11 17:22:05,873][12725] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
2080 |
+
[2025-02-11 17:22:05,873][12725] Initializing actor-critic model on device cuda:0
|
2081 |
+
[2025-02-11 17:22:05,874][12725] RunningMeanStd input shape: (3, 72, 128)
|
2082 |
+
[2025-02-11 17:22:05,875][12725] RunningMeanStd input shape: (1,)
|
2083 |
+
[2025-02-11 17:22:05,894][12725] ConvEncoder: input_channels=3
|
2084 |
+
[2025-02-11 17:22:05,896][12750] Worker 9 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
2085 |
+
[2025-02-11 17:22:05,900][12742] Worker 1 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
2086 |
+
[2025-02-11 17:22:05,906][12748] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
2087 |
+
[2025-02-11 17:22:05,916][12749] Worker 8 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
2088 |
+
[2025-02-11 17:22:05,923][12740] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
2089 |
+
[2025-02-11 17:22:05,924][12740] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
2090 |
+
[2025-02-11 17:22:05,924][12747] Worker 6 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
2091 |
+
[2025-02-11 17:22:05,945][12740] Num visible devices: 1
|
2092 |
+
[2025-02-11 17:22:06,045][12725] Conv encoder output size: 512
|
2093 |
+
[2025-02-11 17:22:06,046][12725] Policy head output size: 512
|
2094 |
+
[2025-02-11 17:22:06,062][12725] Created Actor Critic model with architecture:
|
2095 |
+
[2025-02-11 17:22:06,062][12725] ActorCriticSharedWeights(
|
2096 |
+
(obs_normalizer): ObservationNormalizer(
|
2097 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
2098 |
+
(running_mean_std): ModuleDict(
|
2099 |
+
(obs): RunningMeanStdInPlace()
|
2100 |
+
)
|
2101 |
+
)
|
2102 |
+
)
|
2103 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
2104 |
+
(encoder): VizdoomEncoder(
|
2105 |
+
(basic_encoder): ConvEncoder(
|
2106 |
+
(enc): RecursiveScriptModule(
|
2107 |
+
original_name=ConvEncoderImpl
|
2108 |
+
(conv_head): RecursiveScriptModule(
|
2109 |
+
original_name=Sequential
|
2110 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
2111 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
2112 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
2113 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
2114 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
2115 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
2116 |
+
)
|
2117 |
+
(mlp_layers): RecursiveScriptModule(
|
2118 |
+
original_name=Sequential
|
2119 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
2120 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
2121 |
+
)
|
2122 |
+
)
|
2123 |
+
)
|
2124 |
+
)
|
2125 |
+
(core): ModelCoreRNN(
|
2126 |
+
(core): GRU(512, 512)
|
2127 |
+
)
|
2128 |
+
(decoder): MlpDecoder(
|
2129 |
+
(mlp): Identity()
|
2130 |
+
)
|
2131 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
2132 |
+
(action_parameterization): ActionParameterizationDefault(
|
2133 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
2134 |
+
)
|
2135 |
+
)
|
2136 |
+
[2025-02-11 17:22:06,161][12725] Using optimizer <class 'torch.optim.adam.Adam'>
|
2137 |
+
[2025-02-11 17:22:07,069][12725] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth...
|
2138 |
+
[2025-02-11 17:22:07,100][12725] Loading model from checkpoint
|
2139 |
+
[2025-02-11 17:22:07,101][12725] Loaded experiment state at self.train_step=1955, self.env_steps=8007680
|
2140 |
+
[2025-02-11 17:22:07,101][12725] Initialized policy 0 weights for model version 1955
|
2141 |
+
[2025-02-11 17:22:07,103][12725] LearnerWorker_p0 finished initialization!
|
2142 |
+
[2025-02-11 17:22:07,103][12725] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
2143 |
+
[2025-02-11 17:22:07,180][12740] RunningMeanStd input shape: (3, 72, 128)
|
2144 |
+
[2025-02-11 17:22:07,181][12740] RunningMeanStd input shape: (1,)
|
2145 |
+
[2025-02-11 17:22:07,193][12740] ConvEncoder: input_channels=3
|
2146 |
+
[2025-02-11 17:22:07,297][12740] Conv encoder output size: 512
|
2147 |
+
[2025-02-11 17:22:07,297][12740] Policy head output size: 512
|
2148 |
+
[2025-02-11 17:22:07,332][02117] Inference worker 0-0 is ready!
|
2149 |
+
[2025-02-11 17:22:07,334][02117] All inference workers are ready! Signal rollout workers to start!
|
2150 |
+
[2025-02-11 17:22:07,368][12745] Doom resolution: 160x120, resize resolution: (128, 72)
|
2151 |
+
[2025-02-11 17:22:07,369][12743] Doom resolution: 160x120, resize resolution: (128, 72)
|
2152 |
+
[2025-02-11 17:22:07,388][12750] Doom resolution: 160x120, resize resolution: (128, 72)
|
2153 |
+
[2025-02-11 17:22:07,389][12749] Doom resolution: 160x120, resize resolution: (128, 72)
|
2154 |
+
[2025-02-11 17:22:07,389][12741] Doom resolution: 160x120, resize resolution: (128, 72)
|
2155 |
+
[2025-02-11 17:22:07,390][12748] Doom resolution: 160x120, resize resolution: (128, 72)
|
2156 |
+
[2025-02-11 17:22:07,390][12746] Doom resolution: 160x120, resize resolution: (128, 72)
|
2157 |
+
[2025-02-11 17:22:07,390][12747] Doom resolution: 160x120, resize resolution: (128, 72)
|
2158 |
+
[2025-02-11 17:22:07,390][12744] Doom resolution: 160x120, resize resolution: (128, 72)
|
2159 |
+
[2025-02-11 17:22:07,391][12742] Doom resolution: 160x120, resize resolution: (128, 72)
|
2160 |
+
[2025-02-11 17:22:07,665][12743] Decorrelating experience for 0 frames...
|
2161 |
+
[2025-02-11 17:22:07,665][12745] Decorrelating experience for 0 frames...
|
2162 |
+
[2025-02-11 17:22:07,678][12749] Decorrelating experience for 0 frames...
|
2163 |
+
[2025-02-11 17:22:07,687][12748] Decorrelating experience for 0 frames...
|
2164 |
+
[2025-02-11 17:22:07,688][12741] Decorrelating experience for 0 frames...
|
2165 |
+
[2025-02-11 17:22:07,931][12745] Decorrelating experience for 32 frames...
|
2166 |
+
[2025-02-11 17:22:07,934][12743] Decorrelating experience for 32 frames...
|
2167 |
+
[2025-02-11 17:22:07,950][12749] Decorrelating experience for 32 frames...
|
2168 |
+
[2025-02-11 17:22:07,960][12748] Decorrelating experience for 32 frames...
|
2169 |
+
[2025-02-11 17:22:07,963][12744] Decorrelating experience for 0 frames...
|
2170 |
+
[2025-02-11 17:22:07,964][12741] Decorrelating experience for 32 frames...
|
2171 |
+
[2025-02-11 17:22:08,232][12744] Decorrelating experience for 32 frames...
|
2172 |
+
[2025-02-11 17:22:08,234][12747] Decorrelating experience for 0 frames...
|
2173 |
+
[2025-02-11 17:22:08,262][12750] Decorrelating experience for 0 frames...
|
2174 |
+
[2025-02-11 17:22:08,306][12745] Decorrelating experience for 64 frames...
|
2175 |
+
[2025-02-11 17:22:08,326][12748] Decorrelating experience for 64 frames...
|
2176 |
+
[2025-02-11 17:22:08,502][12746] Decorrelating experience for 0 frames...
|
2177 |
+
[2025-02-11 17:22:08,522][12741] Decorrelating experience for 64 frames...
|
2178 |
+
[2025-02-11 17:22:08,561][12744] Decorrelating experience for 64 frames...
|
2179 |
+
[2025-02-11 17:22:08,580][12750] Decorrelating experience for 32 frames...
|
2180 |
+
[2025-02-11 17:22:08,628][12748] Decorrelating experience for 96 frames...
|
2181 |
+
[2025-02-11 17:22:08,845][12746] Decorrelating experience for 32 frames...
|
2182 |
+
[2025-02-11 17:22:08,846][12742] Decorrelating experience for 0 frames...
|
2183 |
+
[2025-02-11 17:22:08,846][12749] Decorrelating experience for 64 frames...
|
2184 |
+
[2025-02-11 17:22:08,956][12741] Decorrelating experience for 96 frames...
|
2185 |
+
[2025-02-11 17:22:08,969][12747] Decorrelating experience for 32 frames...
|
2186 |
+
[2025-02-11 17:22:09,118][12742] Decorrelating experience for 32 frames...
|
2187 |
+
[2025-02-11 17:22:09,180][12750] Decorrelating experience for 64 frames...
|
2188 |
+
[2025-02-11 17:22:09,281][12744] Decorrelating experience for 96 frames...
|
2189 |
+
[2025-02-11 17:22:09,444][12745] Decorrelating experience for 96 frames...
|
2190 |
+
[2025-02-11 17:22:09,450][12747] Decorrelating experience for 64 frames...
|
2191 |
+
[2025-02-11 17:22:09,557][12742] Decorrelating experience for 64 frames...
|
2192 |
+
[2025-02-11 17:22:09,589][12750] Decorrelating experience for 96 frames...
|
2193 |
+
[2025-02-11 17:22:09,676][12749] Decorrelating experience for 96 frames...
|
2194 |
+
[2025-02-11 17:22:09,776][12743] Decorrelating experience for 64 frames...
|
2195 |
+
[2025-02-11 17:22:09,825][12747] Decorrelating experience for 96 frames...
|
2196 |
+
[2025-02-11 17:22:10,042][12746] Decorrelating experience for 64 frames...
|
2197 |
+
[2025-02-11 17:22:10,160][12743] Decorrelating experience for 96 frames...
|
2198 |
+
[2025-02-11 17:22:10,240][02117] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 8007680. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
2199 |
+
[2025-02-11 17:22:10,245][02117] Avg episode reward: [(0, '4.653')]
|
2200 |
+
[2025-02-11 17:22:10,256][12725] Signal inference workers to stop experience collection...
|
2201 |
+
[2025-02-11 17:22:10,261][12740] InferenceWorker_p0-w0: stopping experience collection
|
2202 |
+
[2025-02-11 17:22:10,367][12742] Decorrelating experience for 96 frames...
|
2203 |
+
[2025-02-11 17:22:10,403][12746] Decorrelating experience for 96 frames...
|
2204 |
+
[2025-02-11 17:22:11,293][12725] Signal inference workers to resume experience collection...
|
2205 |
+
[2025-02-11 17:22:11,294][12740] InferenceWorker_p0-w0: resuming experience collection
|
2206 |
+
[2025-02-11 17:22:12,982][12740] Updated weights for policy 0, policy_version 1965 (0.0089)
|
2207 |
+
[2025-02-11 17:22:14,747][12740] Updated weights for policy 0, policy_version 1975 (0.0013)
|
2208 |
+
[2025-02-11 17:22:15,240][02117] Fps is (10 sec: 18022.6, 60 sec: 18022.6, 300 sec: 18022.6). Total num frames: 8097792. Throughput: 0: 2612.4. Samples: 13062. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2209 |
+
[2025-02-11 17:22:15,242][02117] Avg episode reward: [(0, '24.351')]
|
2210 |
+
[2025-02-11 17:22:16,544][12740] Updated weights for policy 0, policy_version 1985 (0.0012)
|
2211 |
+
[2025-02-11 17:22:18,328][12740] Updated weights for policy 0, policy_version 1995 (0.0012)
|
2212 |
+
[2025-02-11 17:22:20,120][12740] Updated weights for policy 0, policy_version 2005 (0.0013)
|
2213 |
+
[2025-02-11 17:22:20,240][02117] Fps is (10 sec: 20480.0, 60 sec: 20480.0, 300 sec: 20480.0). Total num frames: 8212480. Throughput: 0: 4771.8. Samples: 47718. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2214 |
+
[2025-02-11 17:22:20,243][02117] Avg episode reward: [(0, '34.533')]
|
2215 |
+
[2025-02-11 17:22:20,264][12725] Saving new best policy, reward=34.533!
|
2216 |
+
[2025-02-11 17:22:21,982][12740] Updated weights for policy 0, policy_version 2015 (0.0013)
|
2217 |
+
[2025-02-11 17:22:22,488][02117] Heartbeat connected on Batcher_0
|
2218 |
+
[2025-02-11 17:22:22,492][02117] Heartbeat connected on LearnerWorker_p0
|
2219 |
+
[2025-02-11 17:22:22,502][02117] Heartbeat connected on InferenceWorker_p0-w0
|
2220 |
+
[2025-02-11 17:22:22,505][02117] Heartbeat connected on RolloutWorker_w0
|
2221 |
+
[2025-02-11 17:22:22,510][02117] Heartbeat connected on RolloutWorker_w2
|
2222 |
+
[2025-02-11 17:22:22,513][02117] Heartbeat connected on RolloutWorker_w1
|
2223 |
+
[2025-02-11 17:22:22,515][02117] Heartbeat connected on RolloutWorker_w3
|
2224 |
+
[2025-02-11 17:22:22,518][02117] Heartbeat connected on RolloutWorker_w4
|
2225 |
+
[2025-02-11 17:22:22,524][02117] Heartbeat connected on RolloutWorker_w5
|
2226 |
+
[2025-02-11 17:22:22,526][02117] Heartbeat connected on RolloutWorker_w6
|
2227 |
+
[2025-02-11 17:22:22,531][02117] Heartbeat connected on RolloutWorker_w7
|
2228 |
+
[2025-02-11 17:22:22,534][02117] Heartbeat connected on RolloutWorker_w8
|
2229 |
+
[2025-02-11 17:22:22,536][02117] Heartbeat connected on RolloutWorker_w9
|
2230 |
+
[2025-02-11 17:22:23,888][12740] Updated weights for policy 0, policy_version 2025 (0.0012)
|
2231 |
+
[2025-02-11 17:22:25,240][02117] Fps is (10 sec: 22527.4, 60 sec: 21025.8, 300 sec: 21025.8). Total num frames: 8323072. Throughput: 0: 4280.7. Samples: 64212. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2232 |
+
[2025-02-11 17:22:25,243][02117] Avg episode reward: [(0, '25.262')]
|
2233 |
+
[2025-02-11 17:22:25,646][12740] Updated weights for policy 0, policy_version 2035 (0.0012)
|
2234 |
+
[2025-02-11 17:22:27,367][12740] Updated weights for policy 0, policy_version 2045 (0.0012)
|
2235 |
+
[2025-02-11 17:22:29,259][12740] Updated weights for policy 0, policy_version 2055 (0.0012)
|
2236 |
+
[2025-02-11 17:22:30,240][02117] Fps is (10 sec: 22528.1, 60 sec: 21504.0, 300 sec: 21504.0). Total num frames: 8437760. Throughput: 0: 4926.0. Samples: 98520. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2237 |
+
[2025-02-11 17:22:30,242][02117] Avg episode reward: [(0, '30.272')]
|
2238 |
+
[2025-02-11 17:22:31,060][12740] Updated weights for policy 0, policy_version 2065 (0.0012)
|
2239 |
+
[2025-02-11 17:22:32,816][12740] Updated weights for policy 0, policy_version 2075 (0.0012)
|
2240 |
+
[2025-02-11 17:22:34,670][12740] Updated weights for policy 0, policy_version 2085 (0.0012)
|
2241 |
+
[2025-02-11 17:22:35,240][02117] Fps is (10 sec: 22528.5, 60 sec: 21626.9, 300 sec: 21626.9). Total num frames: 8548352. Throughput: 0: 5287.4. Samples: 132184. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2242 |
+
[2025-02-11 17:22:35,243][02117] Avg episode reward: [(0, '32.111')]
|
2243 |
+
[2025-02-11 17:22:36,522][12740] Updated weights for policy 0, policy_version 2095 (0.0012)
|
2244 |
+
[2025-02-11 17:22:38,243][12740] Updated weights for policy 0, policy_version 2105 (0.0013)
|
2245 |
+
[2025-02-11 17:22:40,005][12740] Updated weights for policy 0, policy_version 2115 (0.0012)
|
2246 |
+
[2025-02-11 17:22:40,240][02117] Fps is (10 sec: 22937.6, 60 sec: 21981.9, 300 sec: 21981.9). Total num frames: 8667136. Throughput: 0: 4968.4. Samples: 149052. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2247 |
+
[2025-02-11 17:22:40,242][02117] Avg episode reward: [(0, '28.859')]
|
2248 |
+
[2025-02-11 17:22:41,777][12740] Updated weights for policy 0, policy_version 2125 (0.0012)
|
2249 |
+
[2025-02-11 17:22:43,523][12740] Updated weights for policy 0, policy_version 2135 (0.0012)
|
2250 |
+
[2025-02-11 17:22:45,240][02117] Fps is (10 sec: 23347.3, 60 sec: 22118.4, 300 sec: 22118.4). Total num frames: 8781824. Throughput: 0: 5256.2. Samples: 183966. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2251 |
+
[2025-02-11 17:22:45,242][02117] Avg episode reward: [(0, '29.565')]
|
2252 |
+
[2025-02-11 17:22:45,300][12740] Updated weights for policy 0, policy_version 2145 (0.0012)
|
2253 |
+
[2025-02-11 17:22:47,067][12740] Updated weights for policy 0, policy_version 2155 (0.0012)
|
2254 |
+
[2025-02-11 17:22:48,968][12740] Updated weights for policy 0, policy_version 2165 (0.0012)
|
2255 |
+
[2025-02-11 17:22:50,240][02117] Fps is (10 sec: 22937.6, 60 sec: 22220.8, 300 sec: 22220.8). Total num frames: 8896512. Throughput: 0: 5445.4. Samples: 217818. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2256 |
+
[2025-02-11 17:22:50,242][02117] Avg episode reward: [(0, '26.417')]
|
2257 |
+
[2025-02-11 17:22:50,734][12740] Updated weights for policy 0, policy_version 2175 (0.0012)
|
2258 |
+
[2025-02-11 17:22:52,483][12740] Updated weights for policy 0, policy_version 2185 (0.0012)
|
2259 |
+
[2025-02-11 17:22:54,226][12740] Updated weights for policy 0, policy_version 2195 (0.0012)
|
2260 |
+
[2025-02-11 17:22:55,240][02117] Fps is (10 sec: 22937.6, 60 sec: 22300.5, 300 sec: 22300.5). Total num frames: 9011200. Throughput: 0: 5230.8. Samples: 235386. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2261 |
+
[2025-02-11 17:22:55,243][02117] Avg episode reward: [(0, '29.656')]
|
2262 |
+
[2025-02-11 17:22:55,982][12740] Updated weights for policy 0, policy_version 2205 (0.0012)
|
2263 |
+
[2025-02-11 17:22:57,759][12740] Updated weights for policy 0, policy_version 2215 (0.0012)
|
2264 |
+
[2025-02-11 17:22:59,524][12740] Updated weights for policy 0, policy_version 2225 (0.0013)
|
2265 |
+
[2025-02-11 17:23:00,240][02117] Fps is (10 sec: 22937.5, 60 sec: 22364.1, 300 sec: 22364.1). Total num frames: 9125888. Throughput: 0: 5722.5. Samples: 270576. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
|
2266 |
+
[2025-02-11 17:23:00,243][02117] Avg episode reward: [(0, '29.065')]
|
2267 |
+
[2025-02-11 17:23:01,394][12740] Updated weights for policy 0, policy_version 2235 (0.0012)
|
2268 |
+
[2025-02-11 17:23:03,194][12740] Updated weights for policy 0, policy_version 2245 (0.0012)
|
2269 |
+
[2025-02-11 17:23:04,946][12740] Updated weights for policy 0, policy_version 2255 (0.0012)
|
2270 |
+
[2025-02-11 17:23:05,240][02117] Fps is (10 sec: 22937.6, 60 sec: 22416.3, 300 sec: 22416.3). Total num frames: 9240576. Throughput: 0: 5701.6. Samples: 304290. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2271 |
+
[2025-02-11 17:23:05,242][02117] Avg episode reward: [(0, '26.903')]
|
2272 |
+
[2025-02-11 17:23:06,717][12740] Updated weights for policy 0, policy_version 2265 (0.0012)
|
2273 |
+
[2025-02-11 17:23:08,490][12740] Updated weights for policy 0, policy_version 2275 (0.0012)
|
2274 |
+
[2025-02-11 17:23:10,236][12740] Updated weights for policy 0, policy_version 2285 (0.0012)
|
2275 |
+
[2025-02-11 17:23:10,240][02117] Fps is (10 sec: 23347.2, 60 sec: 22528.0, 300 sec: 22528.0). Total num frames: 9359360. Throughput: 0: 5720.6. Samples: 321640. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2276 |
+
[2025-02-11 17:23:10,242][02117] Avg episode reward: [(0, '30.326')]
|
2277 |
+
[2025-02-11 17:23:11,999][12740] Updated weights for policy 0, policy_version 2295 (0.0012)
|
2278 |
+
[2025-02-11 17:23:13,780][12740] Updated weights for policy 0, policy_version 2305 (0.0012)
|
2279 |
+
[2025-02-11 17:23:15,240][02117] Fps is (10 sec: 22937.5, 60 sec: 22869.3, 300 sec: 22496.5). Total num frames: 9469952. Throughput: 0: 5737.8. Samples: 356722. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2280 |
+
[2025-02-11 17:23:15,243][02117] Avg episode reward: [(0, '30.082')]
|
2281 |
+
[2025-02-11 17:23:15,651][12740] Updated weights for policy 0, policy_version 2315 (0.0012)
|
2282 |
+
[2025-02-11 17:23:17,395][12740] Updated weights for policy 0, policy_version 2325 (0.0013)
|
2283 |
+
[2025-02-11 17:23:19,180][12740] Updated weights for policy 0, policy_version 2335 (0.0012)
|
2284 |
+
[2025-02-11 17:23:20,240][02117] Fps is (10 sec: 22937.7, 60 sec: 22937.6, 300 sec: 22586.5). Total num frames: 9588736. Throughput: 0: 5748.8. Samples: 390878. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2285 |
+
[2025-02-11 17:23:20,242][02117] Avg episode reward: [(0, '27.262')]
|
2286 |
+
[2025-02-11 17:23:20,925][12740] Updated weights for policy 0, policy_version 2345 (0.0013)
|
2287 |
+
[2025-02-11 17:23:22,635][12740] Updated weights for policy 0, policy_version 2355 (0.0012)
|
2288 |
+
[2025-02-11 17:23:24,389][12740] Updated weights for policy 0, policy_version 2365 (0.0013)
|
2289 |
+
[2025-02-11 17:23:25,240][02117] Fps is (10 sec: 23347.3, 60 sec: 23006.0, 300 sec: 22609.9). Total num frames: 9703424. Throughput: 0: 5768.0. Samples: 408614. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2290 |
+
[2025-02-11 17:23:25,243][02117] Avg episode reward: [(0, '26.114')]
|
2291 |
+
[2025-02-11 17:23:26,214][12740] Updated weights for policy 0, policy_version 2375 (0.0012)
|
2292 |
+
[2025-02-11 17:23:28,133][12740] Updated weights for policy 0, policy_version 2385 (0.0012)
|
2293 |
+
[2025-02-11 17:23:29,929][12740] Updated weights for policy 0, policy_version 2395 (0.0012)
|
2294 |
+
[2025-02-11 17:23:30,240][02117] Fps is (10 sec: 22527.9, 60 sec: 22937.6, 300 sec: 22579.2). Total num frames: 9814016. Throughput: 0: 5744.3. Samples: 442460. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
|
2295 |
+
[2025-02-11 17:23:30,243][02117] Avg episode reward: [(0, '26.835')]
|
2296 |
+
[2025-02-11 17:23:31,721][12740] Updated weights for policy 0, policy_version 2405 (0.0012)
|
2297 |
+
[2025-02-11 17:23:33,459][12740] Updated weights for policy 0, policy_version 2415 (0.0012)
|
2298 |
+
[2025-02-11 17:23:35,215][12740] Updated weights for policy 0, policy_version 2425 (0.0012)
|
2299 |
+
[2025-02-11 17:23:35,240][02117] Fps is (10 sec: 22937.6, 60 sec: 23074.1, 300 sec: 22648.5). Total num frames: 9932800. Throughput: 0: 5761.3. Samples: 477078. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2300 |
+
[2025-02-11 17:23:35,242][02117] Avg episode reward: [(0, '29.854')]
|
2301 |
+
[2025-02-11 17:23:36,977][12740] Updated weights for policy 0, policy_version 2435 (0.0012)
|
2302 |
+
[2025-02-11 17:23:38,755][12740] Updated weights for policy 0, policy_version 2445 (0.0012)
|
2303 |
+
[2025-02-11 17:23:40,240][02117] Fps is (10 sec: 23347.2, 60 sec: 23005.9, 300 sec: 22664.5). Total num frames: 10047488. Throughput: 0: 5759.3. Samples: 494554. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2304 |
+
[2025-02-11 17:23:40,243][02117] Avg episode reward: [(0, '30.698')]
|
2305 |
+
[2025-02-11 17:23:40,640][12740] Updated weights for policy 0, policy_version 2455 (0.0013)
|
2306 |
+
[2025-02-11 17:23:42,455][12740] Updated weights for policy 0, policy_version 2465 (0.0012)
|
2307 |
+
[2025-02-11 17:23:44,200][12740] Updated weights for policy 0, policy_version 2475 (0.0012)
|
2308 |
+
[2025-02-11 17:23:45,240][02117] Fps is (10 sec: 22937.7, 60 sec: 23005.9, 300 sec: 22678.9). Total num frames: 10162176. Throughput: 0: 5730.2. Samples: 528434. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2309 |
+
[2025-02-11 17:23:45,243][02117] Avg episode reward: [(0, '28.478')]
|
2310 |
+
[2025-02-11 17:23:45,948][12740] Updated weights for policy 0, policy_version 2485 (0.0012)
|
2311 |
+
[2025-02-11 17:23:47,702][12740] Updated weights for policy 0, policy_version 2495 (0.0012)
|
2312 |
+
[2025-02-11 17:23:49,472][12740] Updated weights for policy 0, policy_version 2505 (0.0013)
|
2313 |
+
[2025-02-11 17:23:50,240][02117] Fps is (10 sec: 22937.8, 60 sec: 23005.9, 300 sec: 22691.9). Total num frames: 10276864. Throughput: 0: 5759.1. Samples: 563450. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2314 |
+
[2025-02-11 17:23:50,243][02117] Avg episode reward: [(0, '28.311')]
|
2315 |
+
[2025-02-11 17:23:51,217][12740] Updated weights for policy 0, policy_version 2515 (0.0012)
|
2316 |
+
[2025-02-11 17:23:53,041][12740] Updated weights for policy 0, policy_version 2525 (0.0012)
|
2317 |
+
[2025-02-11 17:23:54,944][12740] Updated weights for policy 0, policy_version 2535 (0.0012)
|
2318 |
+
[2025-02-11 17:23:55,240][02117] Fps is (10 sec: 22528.0, 60 sec: 22937.6, 300 sec: 22664.5). Total num frames: 10387456. Throughput: 0: 5757.4. Samples: 580722. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2319 |
+
[2025-02-11 17:23:55,243][02117] Avg episode reward: [(0, '26.855')]
|
2320 |
+
[2025-02-11 17:23:56,680][12740] Updated weights for policy 0, policy_version 2545 (0.0012)
|
2321 |
+
[2025-02-11 17:23:58,455][12740] Updated weights for policy 0, policy_version 2555 (0.0013)
|
2322 |
+
[2025-02-11 17:24:00,183][12740] Updated weights for policy 0, policy_version 2565 (0.0012)
|
2323 |
+
[2025-02-11 17:24:00,240][02117] Fps is (10 sec: 22937.4, 60 sec: 23005.9, 300 sec: 22714.2). Total num frames: 10506240. Throughput: 0: 5729.5. Samples: 614550. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2324 |
+
[2025-02-11 17:24:00,242][02117] Avg episode reward: [(0, '30.195')]
|
2325 |
+
[2025-02-11 17:24:00,249][12725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002565_10506240.pth...
|
2326 |
+
[2025-02-11 17:24:00,326][12725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001591_6516736.pth
|
2327 |
+
[2025-02-11 17:24:01,964][12740] Updated weights for policy 0, policy_version 2575 (0.0012)
|
2328 |
+
[2025-02-11 17:24:03,709][12740] Updated weights for policy 0, policy_version 2585 (0.0011)
|
2329 |
+
[2025-02-11 17:24:05,240][02117] Fps is (10 sec: 23347.3, 60 sec: 23005.9, 300 sec: 22723.9). Total num frames: 10620928. Throughput: 0: 5746.7. Samples: 649480. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2330 |
+
[2025-02-11 17:24:05,243][02117] Avg episode reward: [(0, '29.347')]
|
2331 |
+
[2025-02-11 17:24:05,505][12740] Updated weights for policy 0, policy_version 2595 (0.0012)
|
2332 |
+
[2025-02-11 17:24:07,417][12740] Updated weights for policy 0, policy_version 2605 (0.0012)
|
2333 |
+
[2025-02-11 17:24:09,201][12740] Updated weights for policy 0, policy_version 2615 (0.0012)
|
2334 |
+
[2025-02-11 17:24:10,241][02117] Fps is (10 sec: 22527.5, 60 sec: 22869.2, 300 sec: 22698.6). Total num frames: 10731520. Throughput: 0: 5714.2. Samples: 665754. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2335 |
+
[2025-02-11 17:24:10,243][02117] Avg episode reward: [(0, '28.283')]
|
2336 |
+
[2025-02-11 17:24:10,977][12740] Updated weights for policy 0, policy_version 2625 (0.0012)
|
2337 |
+
[2025-02-11 17:24:12,737][12740] Updated weights for policy 0, policy_version 2635 (0.0012)
|
2338 |
+
[2025-02-11 17:24:14,481][12740] Updated weights for policy 0, policy_version 2645 (0.0012)
|
2339 |
+
[2025-02-11 17:24:15,240][02117] Fps is (10 sec: 22528.1, 60 sec: 22937.6, 300 sec: 22708.2). Total num frames: 10846208. Throughput: 0: 5735.3. Samples: 700548. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
|
2340 |
+
[2025-02-11 17:24:15,242][02117] Avg episode reward: [(0, '29.653')]
|
2341 |
+
[2025-02-11 17:24:16,258][12740] Updated weights for policy 0, policy_version 2655 (0.0013)
|
2342 |
+
[2025-02-11 17:24:18,108][12740] Updated weights for policy 0, policy_version 2665 (0.0012)
|
2343 |
+
[2025-02-11 17:24:20,041][12740] Updated weights for policy 0, policy_version 2675 (0.0012)
|
2344 |
+
[2025-02-11 17:24:20,240][02117] Fps is (10 sec: 22938.3, 60 sec: 22869.3, 300 sec: 22717.1). Total num frames: 10960896. Throughput: 0: 5719.5. Samples: 734454. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2345 |
+
[2025-02-11 17:24:20,242][02117] Avg episode reward: [(0, '32.766')]
|
2346 |
+
[2025-02-11 17:24:21,850][12740] Updated weights for policy 0, policy_version 2685 (0.0012)
|
2347 |
+
[2025-02-11 17:24:23,623][12740] Updated weights for policy 0, policy_version 2695 (0.0012)
|
2348 |
+
[2025-02-11 17:24:25,241][02117] Fps is (10 sec: 22936.3, 60 sec: 22869.1, 300 sec: 22725.1). Total num frames: 11075584. Throughput: 0: 5701.7. Samples: 751134. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2349 |
+
[2025-02-11 17:24:25,243][02117] Avg episode reward: [(0, '32.836')]
|
2350 |
+
[2025-02-11 17:24:25,419][12740] Updated weights for policy 0, policy_version 2705 (0.0012)
|
2351 |
+
[2025-02-11 17:24:27,192][12740] Updated weights for policy 0, policy_version 2715 (0.0012)
|
2352 |
+
[2025-02-11 17:24:28,986][12740] Updated weights for policy 0, policy_version 2725 (0.0012)
|
2353 |
+
[2025-02-11 17:24:30,240][02117] Fps is (10 sec: 22937.5, 60 sec: 22937.6, 300 sec: 22732.8). Total num frames: 11190272. Throughput: 0: 5716.2. Samples: 785662. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2354 |
+
[2025-02-11 17:24:30,243][02117] Avg episode reward: [(0, '30.424')]
|
2355 |
+
[2025-02-11 17:24:30,791][12740] Updated weights for policy 0, policy_version 2735 (0.0012)
|
2356 |
+
[2025-02-11 17:24:32,773][12740] Updated weights for policy 0, policy_version 2745 (0.0013)
|
2357 |
+
[2025-02-11 17:24:34,625][12740] Updated weights for policy 0, policy_version 2755 (0.0013)
|
2358 |
+
[2025-02-11 17:24:35,240][02117] Fps is (10 sec: 22119.6, 60 sec: 22732.8, 300 sec: 22683.4). Total num frames: 11296768. Throughput: 0: 5665.7. Samples: 818408. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2359 |
+
[2025-02-11 17:24:35,243][02117] Avg episode reward: [(0, '29.429')]
|
2360 |
+
[2025-02-11 17:24:36,389][12740] Updated weights for policy 0, policy_version 2765 (0.0013)
|
2361 |
+
[2025-02-11 17:24:38,167][12740] Updated weights for policy 0, policy_version 2775 (0.0012)
|
2362 |
+
[2025-02-11 17:24:39,942][12740] Updated weights for policy 0, policy_version 2785 (0.0013)
|
2363 |
+
[2025-02-11 17:24:40,240][02117] Fps is (10 sec: 22118.3, 60 sec: 22732.8, 300 sec: 22691.8). Total num frames: 11411456. Throughput: 0: 5667.0. Samples: 835738. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2364 |
+
[2025-02-11 17:24:40,243][02117] Avg episode reward: [(0, '30.784')]
|
2365 |
+
[2025-02-11 17:24:41,709][12740] Updated weights for policy 0, policy_version 2795 (0.0012)
|
2366 |
+
[2025-02-11 17:24:43,490][12740] Updated weights for policy 0, policy_version 2805 (0.0012)
|
2367 |
+
[2025-02-11 17:24:45,240][02117] Fps is (10 sec: 22937.5, 60 sec: 22732.8, 300 sec: 22699.8). Total num frames: 11526144. Throughput: 0: 5682.3. Samples: 870252. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2368 |
+
[2025-02-11 17:24:45,242][02117] Avg episode reward: [(0, '28.860')]
|
2369 |
+
[2025-02-11 17:24:45,359][12740] Updated weights for policy 0, policy_version 2815 (0.0012)
|
2370 |
+
[2025-02-11 17:24:47,256][12740] Updated weights for policy 0, policy_version 2825 (0.0012)
|
2371 |
+
[2025-02-11 17:24:49,059][12740] Updated weights for policy 0, policy_version 2835 (0.0012)
|
2372 |
+
[2025-02-11 17:24:50,240][02117] Fps is (10 sec: 22528.0, 60 sec: 22664.5, 300 sec: 22681.6). Total num frames: 11636736. Throughput: 0: 5651.4. Samples: 903792. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2373 |
+
[2025-02-11 17:24:50,243][02117] Avg episode reward: [(0, '29.631')]
|
2374 |
+
[2025-02-11 17:24:50,809][12740] Updated weights for policy 0, policy_version 2845 (0.0012)
|
2375 |
+
[2025-02-11 17:24:52,562][12740] Updated weights for policy 0, policy_version 2855 (0.0013)
|
2376 |
+
[2025-02-11 17:24:54,330][12740] Updated weights for policy 0, policy_version 2865 (0.0013)
|
2377 |
+
[2025-02-11 17:24:55,240][02117] Fps is (10 sec: 22937.8, 60 sec: 22801.1, 300 sec: 22714.2). Total num frames: 11755520. Throughput: 0: 5676.1. Samples: 921176. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2378 |
+
[2025-02-11 17:24:55,243][02117] Avg episode reward: [(0, '31.826')]
|
2379 |
+
[2025-02-11 17:24:56,115][12740] Updated weights for policy 0, policy_version 2875 (0.0012)
|
2380 |
+
[2025-02-11 17:24:57,912][12740] Updated weights for policy 0, policy_version 2885 (0.0012)
|
2381 |
+
[2025-02-11 17:24:59,757][12740] Updated weights for policy 0, policy_version 2895 (0.0013)
|
2382 |
+
[2025-02-11 17:25:00,240][02117] Fps is (10 sec: 22937.7, 60 sec: 22664.5, 300 sec: 22696.7). Total num frames: 11866112. Throughput: 0: 5669.4. Samples: 955672. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
|
2383 |
+
[2025-02-11 17:25:00,242][02117] Avg episode reward: [(0, '31.478')]
|
2384 |
+
[2025-02-11 17:25:01,558][12740] Updated weights for policy 0, policy_version 2905 (0.0012)
|
2385 |
+
[2025-02-11 17:25:03,323][12740] Updated weights for policy 0, policy_version 2915 (0.0012)
|
2386 |
+
[2025-02-11 17:25:05,083][12740] Updated weights for policy 0, policy_version 2925 (0.0012)
|
2387 |
+
[2025-02-11 17:25:05,240][02117] Fps is (10 sec: 22937.5, 60 sec: 22732.8, 300 sec: 22727.0). Total num frames: 11984896. Throughput: 0: 5675.7. Samples: 989860. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2388 |
+
[2025-02-11 17:25:05,242][02117] Avg episode reward: [(0, '32.538')]
|
2389 |
+
[2025-02-11 17:25:06,837][12740] Updated weights for policy 0, policy_version 2935 (0.0012)
|
2390 |
+
[2025-02-11 17:25:08,594][12740] Updated weights for policy 0, policy_version 2945 (0.0012)
|
2391 |
+
[2025-02-11 17:25:10,240][02117] Fps is (10 sec: 23347.2, 60 sec: 22801.2, 300 sec: 22732.8). Total num frames: 12099584. Throughput: 0: 5693.1. Samples: 1007322. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2392 |
+
[2025-02-11 17:25:10,242][02117] Avg episode reward: [(0, '29.907')]
|
2393 |
+
[2025-02-11 17:25:10,415][12740] Updated weights for policy 0, policy_version 2955 (0.0013)
|
2394 |
+
[2025-02-11 17:25:12,321][12740] Updated weights for policy 0, policy_version 2965 (0.0013)
|
2395 |
+
[2025-02-11 17:25:14,123][12740] Updated weights for policy 0, policy_version 2975 (0.0012)
|
2396 |
+
[2025-02-11 17:25:15,240][02117] Fps is (10 sec: 22527.6, 60 sec: 22732.7, 300 sec: 22716.2). Total num frames: 12210176. Throughput: 0: 5668.4. Samples: 1040742. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2397 |
+
[2025-02-11 17:25:15,242][02117] Avg episode reward: [(0, '31.334')]
|
2398 |
+
[2025-02-11 17:25:15,893][12740] Updated weights for policy 0, policy_version 2985 (0.0012)
|
2399 |
+
[2025-02-11 17:25:17,685][12740] Updated weights for policy 0, policy_version 2995 (0.0012)
|
2400 |
+
[2025-02-11 17:25:19,470][12740] Updated weights for policy 0, policy_version 3005 (0.0012)
|
2401 |
+
[2025-02-11 17:25:20,240][02117] Fps is (10 sec: 22528.1, 60 sec: 22732.8, 300 sec: 22722.0). Total num frames: 12324864. Throughput: 0: 5711.3. Samples: 1075414. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2402 |
+
[2025-02-11 17:25:20,242][02117] Avg episode reward: [(0, '31.723')]
|
2403 |
+
[2025-02-11 17:25:21,241][12740] Updated weights for policy 0, policy_version 3015 (0.0013)
|
2404 |
+
[2025-02-11 17:25:22,996][12740] Updated weights for policy 0, policy_version 3025 (0.0012)
|
2405 |
+
[2025-02-11 17:25:24,833][12740] Updated weights for policy 0, policy_version 3035 (0.0012)
|
2406 |
+
[2025-02-11 17:25:25,241][02117] Fps is (10 sec: 22527.7, 60 sec: 22664.6, 300 sec: 22706.5). Total num frames: 12435456. Throughput: 0: 5712.5. Samples: 1092800. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2407 |
+
[2025-02-11 17:25:25,242][02117] Avg episode reward: [(0, '30.617')]
|
2408 |
+
[2025-02-11 17:25:26,695][12740] Updated weights for policy 0, policy_version 3045 (0.0013)
|
2409 |
+
[2025-02-11 17:25:28,486][12740] Updated weights for policy 0, policy_version 3055 (0.0013)
|
2410 |
+
[2025-02-11 17:25:30,240][02117] Fps is (10 sec: 22527.8, 60 sec: 22664.5, 300 sec: 22712.3). Total num frames: 12550144. Throughput: 0: 5692.4. Samples: 1126408. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
|
2411 |
+
[2025-02-11 17:25:30,242][02117] Avg episode reward: [(0, '29.658')]
|
2412 |
+
[2025-02-11 17:25:30,275][12740] Updated weights for policy 0, policy_version 3065 (0.0012)
|
2413 |
+
[2025-02-11 17:25:32,061][12740] Updated weights for policy 0, policy_version 3075 (0.0012)
|
2414 |
+
[2025-02-11 17:25:33,865][12740] Updated weights for policy 0, policy_version 3085 (0.0012)
|
2415 |
+
[2025-02-11 17:25:35,240][02117] Fps is (10 sec: 22938.4, 60 sec: 22801.1, 300 sec: 22717.8). Total num frames: 12664832. Throughput: 0: 5708.3. Samples: 1160666. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2416 |
+
[2025-02-11 17:25:35,243][02117] Avg episode reward: [(0, '33.813')]
|
2417 |
+
[2025-02-11 17:25:35,621][12740] Updated weights for policy 0, policy_version 3095 (0.0012)
|
2418 |
+
[2025-02-11 17:25:37,489][12740] Updated weights for policy 0, policy_version 3105 (0.0013)
|
2419 |
+
[2025-02-11 17:25:39,395][12740] Updated weights for policy 0, policy_version 3115 (0.0013)
|
2420 |
+
[2025-02-11 17:25:40,240][02117] Fps is (10 sec: 22528.0, 60 sec: 22732.8, 300 sec: 22703.5). Total num frames: 12775424. Throughput: 0: 5698.6. Samples: 1177612. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2421 |
+
[2025-02-11 17:25:40,243][02117] Avg episode reward: [(0, '29.854')]
|
2422 |
+
[2025-02-11 17:25:41,147][12740] Updated weights for policy 0, policy_version 3125 (0.0012)
|
2423 |
+
[2025-02-11 17:25:42,928][12740] Updated weights for policy 0, policy_version 3135 (0.0013)
|
2424 |
+
[2025-02-11 17:25:44,707][12740] Updated weights for policy 0, policy_version 3145 (0.0012)
|
2425 |
+
[2025-02-11 17:25:45,240][02117] Fps is (10 sec: 22528.0, 60 sec: 22732.8, 300 sec: 22709.0). Total num frames: 12890112. Throughput: 0: 5686.0. Samples: 1211542. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2426 |
+
[2025-02-11 17:25:45,243][02117] Avg episode reward: [(0, '29.512')]
|
2427 |
+
[2025-02-11 17:25:46,475][12740] Updated weights for policy 0, policy_version 3155 (0.0012)
|
2428 |
+
[2025-02-11 17:25:48,247][12740] Updated weights for policy 0, policy_version 3165 (0.0012)
|
2429 |
+
[2025-02-11 17:25:50,065][12740] Updated weights for policy 0, policy_version 3175 (0.0013)
|
2430 |
+
[2025-02-11 17:25:50,240][02117] Fps is (10 sec: 22937.3, 60 sec: 22801.0, 300 sec: 22714.2). Total num frames: 13004800. Throughput: 0: 5692.1. Samples: 1246006. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2431 |
+
[2025-02-11 17:25:50,243][02117] Avg episode reward: [(0, '29.450')]
|
2432 |
+
[2025-02-11 17:25:52,009][12740] Updated weights for policy 0, policy_version 3185 (0.0013)
|
2433 |
+
[2025-02-11 17:25:53,834][12740] Updated weights for policy 0, policy_version 3195 (0.0012)
|
2434 |
+
[2025-02-11 17:25:55,240][02117] Fps is (10 sec: 22527.9, 60 sec: 22664.5, 300 sec: 22700.9). Total num frames: 13115392. Throughput: 0: 5665.2. Samples: 1262258. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2435 |
+
[2025-02-11 17:25:55,243][02117] Avg episode reward: [(0, '30.317')]
|
2436 |
+
[2025-02-11 17:25:55,613][12740] Updated weights for policy 0, policy_version 3205 (0.0012)
|
2437 |
+
[2025-02-11 17:25:57,381][12740] Updated weights for policy 0, policy_version 3215 (0.0012)
|
2438 |
+
[2025-02-11 17:25:59,144][12740] Updated weights for policy 0, policy_version 3225 (0.0012)
|
2439 |
+
[2025-02-11 17:26:00,240][02117] Fps is (10 sec: 22937.7, 60 sec: 22801.0, 300 sec: 22723.9). Total num frames: 13234176. Throughput: 0: 5687.3. Samples: 1296670. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2440 |
+
[2025-02-11 17:26:00,243][02117] Avg episode reward: [(0, '31.051')]
|
2441 |
+
[2025-02-11 17:26:00,250][12725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003231_13234176.pth...
|
2442 |
+
[2025-02-11 17:26:00,322][12725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth
|
2443 |
+
[2025-02-11 17:26:00,907][12740] Updated weights for policy 0, policy_version 3235 (0.0012)
|
2444 |
+
[2025-02-11 17:26:02,715][12740] Updated weights for policy 0, policy_version 3245 (0.0012)
|
2445 |
+
[2025-02-11 17:26:04,662][12740] Updated weights for policy 0, policy_version 3255 (0.0012)
|
2446 |
+
[2025-02-11 17:26:05,240][02117] Fps is (10 sec: 22937.7, 60 sec: 22664.5, 300 sec: 22711.0). Total num frames: 13344768. Throughput: 0: 5667.4. Samples: 1330446. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2447 |
+
[2025-02-11 17:26:05,244][02117] Avg episode reward: [(0, '30.771')]
|
2448 |
+
[2025-02-11 17:26:06,490][12740] Updated weights for policy 0, policy_version 3265 (0.0012)
|
2449 |
+
[2025-02-11 17:26:08,270][12740] Updated weights for policy 0, policy_version 3275 (0.0012)
|
2450 |
+
[2025-02-11 17:26:10,053][12740] Updated weights for policy 0, policy_version 3285 (0.0012)
|
2451 |
+
[2025-02-11 17:26:10,240][02117] Fps is (10 sec: 22118.5, 60 sec: 22596.3, 300 sec: 22698.7). Total num frames: 13455360. Throughput: 0: 5655.2. Samples: 1347282. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2452 |
+
[2025-02-11 17:26:10,243][02117] Avg episode reward: [(0, '33.388')]
|
2453 |
+
[2025-02-11 17:26:11,856][12740] Updated weights for policy 0, policy_version 3295 (0.0012)
|
2454 |
+
[2025-02-11 17:26:13,642][12740] Updated weights for policy 0, policy_version 3305 (0.0012)
|
2455 |
+
[2025-02-11 17:26:15,240][02117] Fps is (10 sec: 22937.7, 60 sec: 22732.9, 300 sec: 22720.3). Total num frames: 13574144. Throughput: 0: 5672.9. Samples: 1381688. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
|
2456 |
+
[2025-02-11 17:26:15,243][02117] Avg episode reward: [(0, '28.598')]
|
2457 |
+
[2025-02-11 17:26:15,433][12740] Updated weights for policy 0, policy_version 3315 (0.0012)
|
2458 |
+
[2025-02-11 17:26:17,330][12740] Updated weights for policy 0, policy_version 3325 (0.0012)
|
2459 |
+
[2025-02-11 17:26:19,162][12740] Updated weights for policy 0, policy_version 3335 (0.0012)
|
2460 |
+
[2025-02-11 17:26:20,240][02117] Fps is (10 sec: 22937.8, 60 sec: 22664.5, 300 sec: 22708.2). Total num frames: 13684736. Throughput: 0: 5653.9. Samples: 1415090. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2461 |
+
[2025-02-11 17:26:20,243][02117] Avg episode reward: [(0, '30.308')]
|
2462 |
+
[2025-02-11 17:26:20,912][12740] Updated weights for policy 0, policy_version 3345 (0.0012)
|
2463 |
+
[2025-02-11 17:26:22,667][12740] Updated weights for policy 0, policy_version 3355 (0.0012)
|
2464 |
+
[2025-02-11 17:26:24,410][12740] Updated weights for policy 0, policy_version 3365 (0.0012)
|
2465 |
+
[2025-02-11 17:26:25,240][02117] Fps is (10 sec: 22528.0, 60 sec: 22732.9, 300 sec: 22712.7). Total num frames: 13799424. Throughput: 0: 5664.1. Samples: 1432498. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2466 |
+
[2025-02-11 17:26:25,242][02117] Avg episode reward: [(0, '32.269')]
|
2467 |
+
[2025-02-11 17:26:26,182][12740] Updated weights for policy 0, policy_version 3375 (0.0013)
|
2468 |
+
[2025-02-11 17:26:27,933][12740] Updated weights for policy 0, policy_version 3385 (0.0012)
|
2469 |
+
[2025-02-11 17:26:29,789][12740] Updated weights for policy 0, policy_version 3395 (0.0012)
|
2470 |
+
[2025-02-11 17:26:30,240][02117] Fps is (10 sec: 22937.4, 60 sec: 22732.8, 300 sec: 22717.0). Total num frames: 13914112. Throughput: 0: 5687.1. Samples: 1467462. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2471 |
+
[2025-02-11 17:26:30,243][02117] Avg episode reward: [(0, '33.848')]
|
2472 |
+
[2025-02-11 17:26:31,678][12740] Updated weights for policy 0, policy_version 3405 (0.0012)
|
2473 |
+
[2025-02-11 17:26:33,456][12740] Updated weights for policy 0, policy_version 3415 (0.0012)
|
2474 |
+
[2025-02-11 17:26:35,236][12740] Updated weights for policy 0, policy_version 3425 (0.0012)
|
2475 |
+
[2025-02-11 17:26:35,240][02117] Fps is (10 sec: 22937.3, 60 sec: 22732.8, 300 sec: 22721.2). Total num frames: 14028800. Throughput: 0: 5664.5. Samples: 1500910. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2476 |
+
[2025-02-11 17:26:35,243][02117] Avg episode reward: [(0, '29.624')]
|
2477 |
+
[2025-02-11 17:26:36,996][12740] Updated weights for policy 0, policy_version 3435 (0.0012)
|
2478 |
+
[2025-02-11 17:26:38,774][12740] Updated weights for policy 0, policy_version 3445 (0.0012)
|
2479 |
+
[2025-02-11 17:26:40,240][02117] Fps is (10 sec: 22937.6, 60 sec: 22801.1, 300 sec: 22725.2). Total num frames: 14143488. Throughput: 0: 5690.7. Samples: 1518338. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2480 |
+
[2025-02-11 17:26:40,243][02117] Avg episode reward: [(0, '34.344')]
|
2481 |
+
[2025-02-11 17:26:40,551][12740] Updated weights for policy 0, policy_version 3455 (0.0012)
|
2482 |
+
[2025-02-11 17:26:42,362][12740] Updated weights for policy 0, policy_version 3465 (0.0012)
|
2483 |
+
[2025-02-11 17:26:44,232][12740] Updated weights for policy 0, policy_version 3475 (0.0012)
|
2484 |
+
[2025-02-11 17:26:45,240][02117] Fps is (10 sec: 22528.3, 60 sec: 22732.8, 300 sec: 22714.2). Total num frames: 14254080. Throughput: 0: 5681.2. Samples: 1552324. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2485 |
+
[2025-02-11 17:26:45,243][02117] Avg episode reward: [(0, '32.705')]
|
2486 |
+
[2025-02-11 17:26:46,061][12740] Updated weights for policy 0, policy_version 3485 (0.0012)
|
2487 |
+
[2025-02-11 17:26:47,815][12740] Updated weights for policy 0, policy_version 3495 (0.0012)
|
2488 |
+
[2025-02-11 17:26:49,581][12740] Updated weights for policy 0, policy_version 3505 (0.0012)
|
2489 |
+
[2025-02-11 17:26:50,240][02117] Fps is (10 sec: 22528.0, 60 sec: 22732.8, 300 sec: 22718.2). Total num frames: 14368768. Throughput: 0: 5692.4. Samples: 1586604. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2490 |
+
[2025-02-11 17:26:50,242][02117] Avg episode reward: [(0, '31.404')]
|
2491 |
+
[2025-02-11 17:26:51,325][12740] Updated weights for policy 0, policy_version 3515 (0.0012)
|
2492 |
+
[2025-02-11 17:26:53,111][12740] Updated weights for policy 0, policy_version 3525 (0.0012)
|
2493 |
+
[2025-02-11 17:26:54,879][12740] Updated weights for policy 0, policy_version 3535 (0.0013)
|
2494 |
+
[2025-02-11 17:26:55,240][02117] Fps is (10 sec: 23347.1, 60 sec: 22869.3, 300 sec: 22736.4). Total num frames: 14487552. Throughput: 0: 5706.8. Samples: 1604088. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2495 |
+
[2025-02-11 17:26:55,242][02117] Avg episode reward: [(0, '28.501')]
|
2496 |
+
[2025-02-11 17:26:56,754][12740] Updated weights for policy 0, policy_version 3545 (0.0013)
|
2497 |
+
[2025-02-11 17:26:58,574][12740] Updated weights for policy 0, policy_version 3555 (0.0013)
|
2498 |
+
[2025-02-11 17:27:00,240][02117] Fps is (10 sec: 22937.6, 60 sec: 22732.8, 300 sec: 22725.7). Total num frames: 14598144. Throughput: 0: 5691.7. Samples: 1637816. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2499 |
+
[2025-02-11 17:27:00,242][02117] Avg episode reward: [(0, '31.883')]
|
2500 |
+
[2025-02-11 17:27:00,319][12740] Updated weights for policy 0, policy_version 3565 (0.0012)
|
2501 |
+
[2025-02-11 17:27:02,068][12740] Updated weights for policy 0, policy_version 3575 (0.0012)
|
2502 |
+
[2025-02-11 17:27:03,830][12740] Updated weights for policy 0, policy_version 3585 (0.0012)
|
2503 |
+
[2025-02-11 17:27:05,240][02117] Fps is (10 sec: 22528.2, 60 sec: 22801.1, 300 sec: 22729.3). Total num frames: 14712832. Throughput: 0: 5726.0. Samples: 1672758. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2504 |
+
[2025-02-11 17:27:05,243][02117] Avg episode reward: [(0, '32.349')]
|
2505 |
+
[2025-02-11 17:27:05,614][12740] Updated weights for policy 0, policy_version 3595 (0.0012)
|
2506 |
+
[2025-02-11 17:27:07,385][12740] Updated weights for policy 0, policy_version 3605 (0.0012)
|
2507 |
+
[2025-02-11 17:27:09,256][12740] Updated weights for policy 0, policy_version 3615 (0.0013)
|
2508 |
+
[2025-02-11 17:27:10,240][02117] Fps is (10 sec: 22937.6, 60 sec: 22869.3, 300 sec: 22812.6). Total num frames: 14827520. Throughput: 0: 5725.0. Samples: 1690122. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
2509 |
+
[2025-02-11 17:27:10,242][02117] Avg episode reward: [(0, '33.127')]
|
2510 |
+
[2025-02-11 17:27:11,117][12740] Updated weights for policy 0, policy_version 3625 (0.0012)
|
2511 |
+
[2025-02-11 17:27:12,890][12740] Updated weights for policy 0, policy_version 3635 (0.0012)
|
2512 |
+
[2025-02-11 17:27:14,646][12740] Updated weights for policy 0, policy_version 3645 (0.0012)
|
2513 |
+
[2025-02-11 17:27:15,240][02117] Fps is (10 sec: 22937.4, 60 sec: 22801.0, 300 sec: 22812.6). Total num frames: 14942208. Throughput: 0: 5693.6. Samples: 1723674. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
|
2514 |
+
[2025-02-11 17:27:15,242][02117] Avg episode reward: [(0, '30.411')]
|
2515 |
+
[2025-02-11 17:27:16,412][12740] Updated weights for policy 0, policy_version 3655 (0.0012)
|
2516 |
+
[2025-02-11 17:27:18,146][12740] Updated weights for policy 0, policy_version 3665 (0.0012)
|
2517 |
+
[2025-02-11 17:27:19,912][12740] Updated weights for policy 0, policy_version 3675 (0.0012)
|
2518 |
+
[2025-02-11 17:27:20,241][02117] Fps is (10 sec: 22937.1, 60 sec: 22869.2, 300 sec: 22826.5). Total num frames: 15056896. Throughput: 0: 5729.1. Samples: 1758722. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
|
2519 |
+
[2025-02-11 17:27:20,243][02117] Avg episode reward: [(0, '31.541')]
|
2520 |
+
[2025-02-11 17:27:21,741][12740] Updated weights for policy 0, policy_version 3685 (0.0012)
|
2521 |
+
[2025-02-11 17:27:23,664][12740] Updated weights for policy 0, policy_version 3695 (0.0012)
|
2522 |
+
[2025-02-11 17:27:25,240][02117] Fps is (10 sec: 22937.5, 60 sec: 22869.3, 300 sec: 22826.5). Total num frames: 15171584. Throughput: 0: 5715.0. Samples: 1775512. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2523 |
+
[2025-02-11 17:27:25,243][02117] Avg episode reward: [(0, '32.396')]
|
2524 |
+
[2025-02-11 17:27:25,416][12740] Updated weights for policy 0, policy_version 3705 (0.0012)
|
2525 |
+
[2025-02-11 17:27:27,153][12740] Updated weights for policy 0, policy_version 3715 (0.0012)
|
2526 |
+
[2025-02-11 17:27:28,915][12740] Updated weights for policy 0, policy_version 3725 (0.0012)
|
2527 |
+
[2025-02-11 17:27:30,240][02117] Fps is (10 sec: 22938.2, 60 sec: 22869.3, 300 sec: 22840.4). Total num frames: 15286272. Throughput: 0: 5725.2. Samples: 1809958. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2528 |
+
[2025-02-11 17:27:30,243][02117] Avg episode reward: [(0, '32.554')]
|
2529 |
+
[2025-02-11 17:27:30,661][12740] Updated weights for policy 0, policy_version 3735 (0.0012)
|
2530 |
+
[2025-02-11 17:27:32,452][12740] Updated weights for policy 0, policy_version 3745 (0.0012)
|
2531 |
+
[2025-02-11 17:27:34,308][12740] Updated weights for policy 0, policy_version 3755 (0.0013)
|
2532 |
+
[2025-02-11 17:27:35,240][02117] Fps is (10 sec: 22528.0, 60 sec: 22801.1, 300 sec: 22812.6). Total num frames: 15396864. Throughput: 0: 5724.8. Samples: 1844220. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2533 |
+
[2025-02-11 17:27:35,242][02117] Avg episode reward: [(0, '31.010')]
|
2534 |
+
[2025-02-11 17:27:36,253][12740] Updated weights for policy 0, policy_version 3765 (0.0013)
|
2535 |
+
[2025-02-11 17:27:38,062][12740] Updated weights for policy 0, policy_version 3775 (0.0012)
|
2536 |
+
[2025-02-11 17:27:39,873][12740] Updated weights for policy 0, policy_version 3785 (0.0012)
|
2537 |
+
[2025-02-11 17:27:40,240][02117] Fps is (10 sec: 22528.0, 60 sec: 22801.1, 300 sec: 22812.6). Total num frames: 15511552. Throughput: 0: 5696.6. Samples: 1860434. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2538 |
+
[2025-02-11 17:27:40,242][02117] Avg episode reward: [(0, '35.659')]
|
2539 |
+
[2025-02-11 17:27:40,251][12725] Saving new best policy, reward=35.659!
|
2540 |
+
[2025-02-11 17:27:41,648][12740] Updated weights for policy 0, policy_version 3795 (0.0012)
|
2541 |
+
[2025-02-11 17:27:43,401][12740] Updated weights for policy 0, policy_version 3805 (0.0012)
|
2542 |
+
[2025-02-11 17:27:45,191][12740] Updated weights for policy 0, policy_version 3815 (0.0012)
|
2543 |
+
[2025-02-11 17:27:45,240][02117] Fps is (10 sec: 22937.7, 60 sec: 22869.3, 300 sec: 22812.6). Total num frames: 15626240. Throughput: 0: 5713.2. Samples: 1894908. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2544 |
+
[2025-02-11 17:27:45,242][02117] Avg episode reward: [(0, '33.112')]
|
2545 |
+
[2025-02-11 17:27:47,000][12740] Updated weights for policy 0, policy_version 3825 (0.0012)
|
2546 |
+
[2025-02-11 17:27:48,914][12740] Updated weights for policy 0, policy_version 3835 (0.0013)
|
2547 |
+
[2025-02-11 17:27:50,240][02117] Fps is (10 sec: 22527.9, 60 sec: 22801.1, 300 sec: 22798.7). Total num frames: 15736832. Throughput: 0: 5680.7. Samples: 1928392. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
2548 |
+
[2025-02-11 17:27:50,242][02117] Avg episode reward: [(0, '35.327')]
|
2549 |
+
[2025-02-11 17:27:50,726][12740] Updated weights for policy 0, policy_version 3845 (0.0012)
|
2550 |
+
[2025-02-11 17:27:52,500][12740] Updated weights for policy 0, policy_version 3855 (0.0013)
|
2551 |
+
[2025-02-11 17:27:54,263][12740] Updated weights for policy 0, policy_version 3865 (0.0012)
|
2552 |
+
[2025-02-11 17:27:55,240][02117] Fps is (10 sec: 22528.2, 60 sec: 22732.8, 300 sec: 22798.8). Total num frames: 15851520. Throughput: 0: 5676.1. Samples: 1945548. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2553 |
+
[2025-02-11 17:27:55,243][02117] Avg episode reward: [(0, '34.351')]
|
2554 |
+
[2025-02-11 17:27:56,028][12740] Updated weights for policy 0, policy_version 3875 (0.0012)
|
2555 |
+
[2025-02-11 17:27:57,815][12740] Updated weights for policy 0, policy_version 3885 (0.0012)
|
2556 |
+
[2025-02-11 17:27:59,620][12740] Updated weights for policy 0, policy_version 3895 (0.0012)
|
2557 |
+
[2025-02-11 17:28:00,240][02117] Fps is (10 sec: 22937.2, 60 sec: 22801.0, 300 sec: 22798.7). Total num frames: 15966208. Throughput: 0: 5700.1. Samples: 1980180. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
2558 |
+
[2025-02-11 17:28:00,243][02117] Avg episode reward: [(0, '33.750')]
|
2559 |
+
[2025-02-11 17:28:00,250][12725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003898_15966208.pth...
|
2560 |
+
[2025-02-11 17:28:00,325][12725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002565_10506240.pth
|
2561 |
+
[2025-02-11 17:28:01,476][12740] Updated weights for policy 0, policy_version 3905 (0.0012)
|
2562 |
+
[2025-02-11 17:28:02,076][12725] Stopping Batcher_0...
|
2563 |
+
[2025-02-11 17:28:02,077][12725] Loop batcher_evt_loop terminating...
|
2564 |
+
[2025-02-11 17:28:02,077][12725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth...
|
2565 |
+
[2025-02-11 17:28:02,077][02117] Component Batcher_0 stopped!
|
2566 |
+
[2025-02-11 17:28:02,100][12740] Weights refcount: 2 0
|
2567 |
+
[2025-02-11 17:28:02,102][12740] Stopping InferenceWorker_p0-w0...
|
2568 |
+
[2025-02-11 17:28:02,102][12740] Loop inference_proc0-0_evt_loop terminating...
|
2569 |
+
[2025-02-11 17:28:02,102][02117] Component InferenceWorker_p0-w0 stopped!
|
2570 |
+
[2025-02-11 17:28:02,125][12750] Stopping RolloutWorker_w9...
|
2571 |
+
[2025-02-11 17:28:02,125][12750] Loop rollout_proc9_evt_loop terminating...
|
2572 |
+
[2025-02-11 17:28:02,125][02117] Component RolloutWorker_w9 stopped!
|
2573 |
+
[2025-02-11 17:28:02,130][12744] Stopping RolloutWorker_w4...
|
2574 |
+
[2025-02-11 17:28:02,131][12744] Loop rollout_proc4_evt_loop terminating...
|
2575 |
+
[2025-02-11 17:28:02,131][12742] Stopping RolloutWorker_w1...
|
2576 |
+
[2025-02-11 17:28:02,132][12742] Loop rollout_proc1_evt_loop terminating...
|
2577 |
+
[2025-02-11 17:28:02,131][02117] Component RolloutWorker_w4 stopped!
|
2578 |
+
[2025-02-11 17:28:02,132][12745] Stopping RolloutWorker_w3...
|
2579 |
+
[2025-02-11 17:28:02,133][12745] Loop rollout_proc3_evt_loop terminating...
|
2580 |
+
[2025-02-11 17:28:02,134][12746] Stopping RolloutWorker_w5...
|
2581 |
+
[2025-02-11 17:28:02,133][02117] Component RolloutWorker_w1 stopped!
|
2582 |
+
[2025-02-11 17:28:02,134][12746] Loop rollout_proc5_evt_loop terminating...
|
2583 |
+
[2025-02-11 17:28:02,135][12748] Stopping RolloutWorker_w7...
|
2584 |
+
[2025-02-11 17:28:02,135][12748] Loop rollout_proc7_evt_loop terminating...
|
2585 |
+
[2025-02-11 17:28:02,136][12743] Stopping RolloutWorker_w2...
|
2586 |
+
[2025-02-11 17:28:02,135][02117] Component RolloutWorker_w3 stopped!
|
2587 |
+
[2025-02-11 17:28:02,137][12743] Loop rollout_proc2_evt_loop terminating...
|
2588 |
+
[2025-02-11 17:28:02,136][02117] Component RolloutWorker_w5 stopped!
|
2589 |
+
[2025-02-11 17:28:02,139][12747] Stopping RolloutWorker_w6...
|
2590 |
+
[2025-02-11 17:28:02,140][12747] Loop rollout_proc6_evt_loop terminating...
|
2591 |
+
[2025-02-11 17:28:02,138][02117] Component RolloutWorker_w7 stopped!
|
2592 |
+
[2025-02-11 17:28:02,143][02117] Component RolloutWorker_w2 stopped!
|
2593 |
+
[2025-02-11 17:28:02,145][02117] Component RolloutWorker_w6 stopped!
|
2594 |
+
[2025-02-11 17:28:02,161][12725] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003231_13234176.pth
|
2595 |
+
[2025-02-11 17:28:02,164][12741] Stopping RolloutWorker_w0...
|
2596 |
+
[2025-02-11 17:28:02,164][12741] Loop rollout_proc0_evt_loop terminating...
|
2597 |
+
[2025-02-11 17:28:02,164][02117] Component RolloutWorker_w0 stopped!
|
2598 |
+
[2025-02-11 17:28:02,166][12749] Stopping RolloutWorker_w8...
|
2599 |
+
[2025-02-11 17:28:02,166][12749] Loop rollout_proc8_evt_loop terminating...
|
2600 |
+
[2025-02-11 17:28:02,166][02117] Component RolloutWorker_w8 stopped!
|
2601 |
+
[2025-02-11 17:28:02,172][12725] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth...
|
2602 |
+
[2025-02-11 17:28:02,295][12725] Stopping LearnerWorker_p0...
|
2603 |
+
[2025-02-11 17:28:02,296][12725] Loop learner_proc0_evt_loop terminating...
|
2604 |
+
[2025-02-11 17:28:02,296][02117] Component LearnerWorker_p0 stopped!
|
2605 |
+
[2025-02-11 17:28:02,298][02117] Waiting for process learner_proc0 to stop...
|
2606 |
+
[2025-02-11 17:28:03,257][02117] Waiting for process inference_proc0-0 to join...
|
2607 |
+
[2025-02-11 17:28:03,258][02117] Waiting for process rollout_proc0 to join...
|
2608 |
+
[2025-02-11 17:28:03,260][02117] Waiting for process rollout_proc1 to join...
|
2609 |
+
[2025-02-11 17:28:03,261][02117] Waiting for process rollout_proc2 to join...
|
2610 |
+
[2025-02-11 17:28:03,263][02117] Waiting for process rollout_proc3 to join...
|
2611 |
+
[2025-02-11 17:28:03,264][02117] Waiting for process rollout_proc4 to join...
|
2612 |
+
[2025-02-11 17:28:03,266][02117] Waiting for process rollout_proc5 to join...
|
2613 |
+
[2025-02-11 17:28:03,267][02117] Waiting for process rollout_proc6 to join...
|
2614 |
+
[2025-02-11 17:28:03,268][02117] Waiting for process rollout_proc7 to join...
|
2615 |
+
[2025-02-11 17:28:03,269][02117] Waiting for process rollout_proc8 to join...
|
2616 |
+
[2025-02-11 17:28:03,271][02117] Waiting for process rollout_proc9 to join...
|
2617 |
+
[2025-02-11 17:28:03,272][02117] Batcher 0 profile tree view:
|
2618 |
+
batching: 33.5245, releasing_batches: 0.0484
|
2619 |
+
[2025-02-11 17:28:03,273][02117] InferenceWorker_p0-w0 profile tree view:
|
2620 |
+
wait_policy: 0.0001
|
2621 |
+
wait_policy_total: 6.2807
|
2622 |
+
update_model: 6.0939
|
2623 |
+
weight_update: 0.0012
|
2624 |
+
one_step: 0.0030
|
2625 |
+
handle_policy_step: 327.3869
|
2626 |
+
deserialize: 14.1943, stack: 2.1812, obs_to_device_normalize: 81.2034, forward: 150.1038, send_messages: 25.7323
|
2627 |
+
prepare_outputs: 41.1445
|
2628 |
+
to_cpu: 26.4090
|
2629 |
+
[2025-02-11 17:28:03,274][02117] Learner 0 profile tree view:
|
2630 |
+
misc: 0.0097, prepare_batch: 18.7589
|
2631 |
+
train: 45.7863
|
2632 |
+
epoch_init: 0.0085, minibatch_init: 0.0109, losses_postprocess: 0.4906, kl_divergence: 0.6622, after_optimizer: 0.8413
|
2633 |
+
calculate_losses: 18.1321
|
2634 |
+
losses_init: 0.0059, forward_head: 1.3001, bptt_initial: 10.7448, tail: 1.1877, advantages_returns: 0.3223, losses: 2.0733
|
2635 |
+
bptt: 2.1968
|
2636 |
+
bptt_forward_core: 2.0972
|
2637 |
+
update: 24.9648
|
2638 |
+
clip: 1.4294
|
2639 |
+
[2025-02-11 17:28:03,276][02117] RolloutWorker_w0 profile tree view:
|
2640 |
+
wait_for_trajectories: 0.2094, enqueue_policy_requests: 14.7807, env_step: 223.1613, overhead: 9.1558, complete_rollouts: 0.3620
|
2641 |
+
save_policy_outputs: 13.4449
|
2642 |
+
split_output_tensors: 5.1515
|
2643 |
+
[2025-02-11 17:28:03,278][02117] RolloutWorker_w9 profile tree view:
|
2644 |
+
wait_for_trajectories: 0.2073, enqueue_policy_requests: 14.6800, env_step: 223.5111, overhead: 8.9706, complete_rollouts: 0.3574
|
2645 |
+
save_policy_outputs: 13.4113
|
2646 |
+
split_output_tensors: 5.1475
|
2647 |
+
[2025-02-11 17:28:03,279][02117] Loop Runner_EvtLoop terminating...
|
2648 |
+
[2025-02-11 17:28:03,280][02117] Runner profile tree view:
|
2649 |
+
main_loop: 360.7447
|
2650 |
+
[2025-02-11 17:28:03,282][02117] Collected {0: 16007168}, FPS: 22174.9
|
2651 |
+
[2025-02-11 17:28:29,955][02117] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json
|
2652 |
+
[2025-02-11 17:28:29,957][02117] Overriding arg 'num_workers' with value 1 passed from command line
|
2653 |
+
[2025-02-11 17:28:29,958][02117] Adding new argument 'no_render'=True that is not in the saved config file!
|
2654 |
+
[2025-02-11 17:28:29,960][02117] Adding new argument 'save_video'=True that is not in the saved config file!
|
2655 |
+
[2025-02-11 17:28:29,961][02117] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
2656 |
+
[2025-02-11 17:28:29,963][02117] Adding new argument 'video_name'=None that is not in the saved config file!
|
2657 |
+
[2025-02-11 17:28:29,964][02117] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
2658 |
+
[2025-02-11 17:28:29,966][02117] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
2659 |
+
[2025-02-11 17:28:29,967][02117] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
2660 |
+
[2025-02-11 17:28:29,968][02117] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
2661 |
+
[2025-02-11 17:28:29,969][02117] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
2662 |
+
[2025-02-11 17:28:29,970][02117] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
2663 |
+
[2025-02-11 17:28:29,971][02117] Adding new argument 'train_script'=None that is not in the saved config file!
|
2664 |
+
[2025-02-11 17:28:29,973][02117] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
2665 |
+
[2025-02-11 17:28:29,974][02117] Using frameskip 1 and render_action_repeat=4 for evaluation
|
2666 |
+
[2025-02-11 17:28:30,003][02117] RunningMeanStd input shape: (3, 72, 128)
|
2667 |
+
[2025-02-11 17:28:30,006][02117] RunningMeanStd input shape: (1,)
|
2668 |
+
[2025-02-11 17:28:30,019][02117] ConvEncoder: input_channels=3
|
2669 |
+
[2025-02-11 17:28:30,058][02117] Conv encoder output size: 512
|
2670 |
+
[2025-02-11 17:28:30,059][02117] Policy head output size: 512
|
2671 |
+
[2025-02-11 17:28:30,081][02117] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth...
|
2672 |
+
[2025-02-11 17:28:30,498][02117] Num frames 100...
|
2673 |
+
[2025-02-11 17:28:30,623][02117] Num frames 200...
|
2674 |
+
[2025-02-11 17:28:30,746][02117] Num frames 300...
|
2675 |
+
[2025-02-11 17:28:30,870][02117] Num frames 400...
|
2676 |
+
[2025-02-11 17:28:30,992][02117] Num frames 500...
|
2677 |
+
[2025-02-11 17:28:31,122][02117] Num frames 600...
|
2678 |
+
[2025-02-11 17:28:31,248][02117] Num frames 700...
|
2679 |
+
[2025-02-11 17:28:31,371][02117] Num frames 800...
|
2680 |
+
[2025-02-11 17:28:31,497][02117] Num frames 900...
|
2681 |
+
[2025-02-11 17:28:31,622][02117] Num frames 1000...
|
2682 |
+
[2025-02-11 17:28:31,753][02117] Num frames 1100...
|
2683 |
+
[2025-02-11 17:28:31,884][02117] Num frames 1200...
|
2684 |
+
[2025-02-11 17:28:32,014][02117] Num frames 1300...
|
2685 |
+
[2025-02-11 17:28:32,142][02117] Num frames 1400...
|
2686 |
+
[2025-02-11 17:28:32,269][02117] Num frames 1500...
|
2687 |
+
[2025-02-11 17:28:32,400][02117] Num frames 1600...
|
2688 |
+
[2025-02-11 17:28:32,530][02117] Num frames 1700...
|
2689 |
+
[2025-02-11 17:28:32,660][02117] Num frames 1800...
|
2690 |
+
[2025-02-11 17:28:32,789][02117] Num frames 1900...
|
2691 |
+
[2025-02-11 17:28:32,919][02117] Num frames 2000...
|
2692 |
+
[2025-02-11 17:28:33,053][02117] Num frames 2100...
|
2693 |
+
[2025-02-11 17:28:33,105][02117] Avg episode rewards: #0: 55.999, true rewards: #0: 21.000
|
2694 |
+
[2025-02-11 17:28:33,107][02117] Avg episode reward: 55.999, avg true_objective: 21.000
|
2695 |
+
[2025-02-11 17:28:33,238][02117] Num frames 2200...
|
2696 |
+
[2025-02-11 17:28:33,363][02117] Num frames 2300...
|
2697 |
+
[2025-02-11 17:28:33,493][02117] Num frames 2400...
|
2698 |
+
[2025-02-11 17:28:33,620][02117] Num frames 2500...
|
2699 |
+
[2025-02-11 17:28:33,750][02117] Num frames 2600...
|
2700 |
+
[2025-02-11 17:28:33,879][02117] Num frames 2700...
|
2701 |
+
[2025-02-11 17:28:34,006][02117] Num frames 2800...
|
2702 |
+
[2025-02-11 17:28:34,136][02117] Avg episode rewards: #0: 36.785, true rewards: #0: 14.285
|
2703 |
+
[2025-02-11 17:28:34,138][02117] Avg episode reward: 36.785, avg true_objective: 14.285
|
2704 |
+
[2025-02-11 17:28:34,194][02117] Num frames 2900...
|
2705 |
+
[2025-02-11 17:28:34,321][02117] Num frames 3000...
|
2706 |
+
[2025-02-11 17:28:34,459][02117] Num frames 3100...
|
2707 |
+
[2025-02-11 17:28:34,587][02117] Num frames 3200...
|
2708 |
+
[2025-02-11 17:28:34,714][02117] Num frames 3300...
|
2709 |
+
[2025-02-11 17:28:34,862][02117] Avg episode rewards: #0: 28.910, true rewards: #0: 11.243
|
2710 |
+
[2025-02-11 17:28:34,863][02117] Avg episode reward: 28.910, avg true_objective: 11.243
|
2711 |
+
[2025-02-11 17:28:34,900][02117] Num frames 3400...
|
2712 |
+
[2025-02-11 17:28:35,026][02117] Num frames 3500...
|
2713 |
+
[2025-02-11 17:28:35,153][02117] Num frames 3600...
|
2714 |
+
[2025-02-11 17:28:35,285][02117] Num frames 3700...
|
2715 |
+
[2025-02-11 17:28:35,414][02117] Num frames 3800...
|
2716 |
+
[2025-02-11 17:28:35,542][02117] Num frames 3900...
|
2717 |
+
[2025-02-11 17:28:35,674][02117] Num frames 4000...
|
2718 |
+
[2025-02-11 17:28:35,803][02117] Num frames 4100...
|
2719 |
+
[2025-02-11 17:28:35,939][02117] Num frames 4200...
|
2720 |
+
[2025-02-11 17:28:36,069][02117] Num frames 4300...
|
2721 |
+
[2025-02-11 17:28:36,198][02117] Num frames 4400...
|
2722 |
+
[2025-02-11 17:28:36,373][02117] Avg episode rewards: #0: 29.232, true rewards: #0: 11.233
|
2723 |
+
[2025-02-11 17:28:36,375][02117] Avg episode reward: 29.232, avg true_objective: 11.233
|
2724 |
+
[2025-02-11 17:28:36,386][02117] Num frames 4500...
|
2725 |
+
[2025-02-11 17:28:36,516][02117] Num frames 4600...
|
2726 |
+
[2025-02-11 17:28:36,642][02117] Num frames 4700...
|
2727 |
+
[2025-02-11 17:28:36,772][02117] Num frames 4800...
|
2728 |
+
[2025-02-11 17:28:36,903][02117] Num frames 4900...
|
2729 |
+
[2025-02-11 17:28:37,037][02117] Num frames 5000...
|
2730 |
+
[2025-02-11 17:28:37,174][02117] Num frames 5100...
|
2731 |
+
[2025-02-11 17:28:37,327][02117] Num frames 5200...
|
2732 |
+
[2025-02-11 17:28:37,457][02117] Num frames 5300...
|
2733 |
+
[2025-02-11 17:28:37,585][02117] Num frames 5400...
|
2734 |
+
[2025-02-11 17:28:37,712][02117] Num frames 5500...
|
2735 |
+
[2025-02-11 17:28:37,835][02117] Num frames 5600...
|
2736 |
+
[2025-02-11 17:28:37,963][02117] Num frames 5700...
|
2737 |
+
[2025-02-11 17:28:38,096][02117] Num frames 5800...
|
2738 |
+
[2025-02-11 17:28:38,223][02117] Num frames 5900...
|
2739 |
+
[2025-02-11 17:28:38,349][02117] Num frames 6000...
|
2740 |
+
[2025-02-11 17:28:38,480][02117] Num frames 6100...
|
2741 |
+
[2025-02-11 17:28:38,608][02117] Num frames 6200...
|
2742 |
+
[2025-02-11 17:28:38,735][02117] Num frames 6300...
|
2743 |
+
[2025-02-11 17:28:38,861][02117] Num frames 6400...
|
2744 |
+
[2025-02-11 17:28:39,044][02117] Avg episode rewards: #0: 34.198, true rewards: #0: 12.998
|
2745 |
+
[2025-02-11 17:28:39,045][02117] Avg episode reward: 34.198, avg true_objective: 12.998
|
2746 |
+
[2025-02-11 17:28:39,049][02117] Num frames 6500...
|
2747 |
+
[2025-02-11 17:28:39,175][02117] Num frames 6600...
|
2748 |
+
[2025-02-11 17:28:39,302][02117] Num frames 6700...
|
2749 |
+
[2025-02-11 17:28:39,425][02117] Num frames 6800...
|
2750 |
+
[2025-02-11 17:28:39,552][02117] Num frames 6900...
|
2751 |
+
[2025-02-11 17:28:39,680][02117] Num frames 7000...
|
2752 |
+
[2025-02-11 17:28:39,804][02117] Num frames 7100...
|
2753 |
+
[2025-02-11 17:28:39,930][02117] Num frames 7200...
|
2754 |
+
[2025-02-11 17:28:40,061][02117] Num frames 7300...
|
2755 |
+
[2025-02-11 17:28:40,190][02117] Num frames 7400...
|
2756 |
+
[2025-02-11 17:28:40,318][02117] Num frames 7500...
|
2757 |
+
[2025-02-11 17:28:40,442][02117] Num frames 7600...
|
2758 |
+
[2025-02-11 17:28:40,571][02117] Num frames 7700...
|
2759 |
+
[2025-02-11 17:28:40,695][02117] Num frames 7800...
|
2760 |
+
[2025-02-11 17:28:40,818][02117] Num frames 7900...
|
2761 |
+
[2025-02-11 17:28:40,945][02117] Num frames 8000...
|
2762 |
+
[2025-02-11 17:28:41,076][02117] Avg episode rewards: #0: 35.095, true rewards: #0: 13.428
|
2763 |
+
[2025-02-11 17:28:41,077][02117] Avg episode reward: 35.095, avg true_objective: 13.428
|
2764 |
+
[2025-02-11 17:28:41,133][02117] Num frames 8100...
|
2765 |
+
[2025-02-11 17:28:41,259][02117] Num frames 8200...
|
2766 |
+
[2025-02-11 17:28:41,386][02117] Num frames 8300...
|
2767 |
+
[2025-02-11 17:28:41,514][02117] Num frames 8400...
|
2768 |
+
[2025-02-11 17:28:41,643][02117] Num frames 8500...
|
2769 |
+
[2025-02-11 17:28:41,768][02117] Num frames 8600...
|
2770 |
+
[2025-02-11 17:28:41,894][02117] Num frames 8700...
|
2771 |
+
[2025-02-11 17:28:42,027][02117] Num frames 8800...
|
2772 |
+
[2025-02-11 17:28:42,152][02117] Num frames 8900...
|
2773 |
+
[2025-02-11 17:28:42,278][02117] Num frames 9000...
|
2774 |
+
[2025-02-11 17:28:42,405][02117] Num frames 9100...
|
2775 |
+
[2025-02-11 17:28:42,559][02117] Avg episode rewards: #0: 34.253, true rewards: #0: 13.110
|
2776 |
+
[2025-02-11 17:28:42,560][02117] Avg episode reward: 34.253, avg true_objective: 13.110
|
2777 |
+
[2025-02-11 17:28:42,591][02117] Num frames 9200...
|
2778 |
+
[2025-02-11 17:28:42,717][02117] Num frames 9300...
|
2779 |
+
[2025-02-11 17:28:42,843][02117] Num frames 9400...
|
2780 |
+
[2025-02-11 17:28:42,968][02117] Num frames 9500...
|
2781 |
+
[2025-02-11 17:28:43,097][02117] Num frames 9600...
|
2782 |
+
[2025-02-11 17:28:43,221][02117] Num frames 9700...
|
2783 |
+
[2025-02-11 17:28:43,350][02117] Num frames 9800...
|
2784 |
+
[2025-02-11 17:28:43,479][02117] Num frames 9900...
|
2785 |
+
[2025-02-11 17:28:43,606][02117] Num frames 10000...
|
2786 |
+
[2025-02-11 17:28:43,734][02117] Num frames 10100...
|
2787 |
+
[2025-02-11 17:28:43,858][02117] Num frames 10200...
|
2788 |
+
[2025-02-11 17:28:43,985][02117] Num frames 10300...
|
2789 |
+
[2025-02-11 17:28:44,113][02117] Num frames 10400...
|
2790 |
+
[2025-02-11 17:28:44,239][02117] Num frames 10500...
|
2791 |
+
[2025-02-11 17:28:44,365][02117] Num frames 10600...
|
2792 |
+
[2025-02-11 17:28:44,492][02117] Num frames 10700...
|
2793 |
+
[2025-02-11 17:28:44,645][02117] Avg episode rewards: #0: 34.971, true rewards: #0: 13.471
|
2794 |
+
[2025-02-11 17:28:44,647][02117] Avg episode reward: 34.971, avg true_objective: 13.471
|
2795 |
+
[2025-02-11 17:28:44,676][02117] Num frames 10800...
|
2796 |
+
[2025-02-11 17:28:44,803][02117] Num frames 10900...
|
2797 |
+
[2025-02-11 17:28:44,929][02117] Num frames 11000...
|
2798 |
+
[2025-02-11 17:28:45,057][02117] Num frames 11100...
|
2799 |
+
[2025-02-11 17:28:45,182][02117] Num frames 11200...
|
2800 |
+
[2025-02-11 17:28:45,312][02117] Num frames 11300...
|
2801 |
+
[2025-02-11 17:28:45,438][02117] Num frames 11400...
|
2802 |
+
[2025-02-11 17:28:45,564][02117] Num frames 11500...
|
2803 |
+
[2025-02-11 17:28:45,692][02117] Num frames 11600...
|
2804 |
+
[2025-02-11 17:28:45,819][02117] Num frames 11700...
|
2805 |
+
[2025-02-11 17:28:45,962][02117] Avg episode rewards: #0: 33.854, true rewards: #0: 13.077
|
2806 |
+
[2025-02-11 17:28:45,963][02117] Avg episode reward: 33.854, avg true_objective: 13.077
|
2807 |
+
[2025-02-11 17:28:46,004][02117] Num frames 11800...
|
2808 |
+
[2025-02-11 17:28:46,132][02117] Num frames 11900...
|
2809 |
+
[2025-02-11 17:28:46,259][02117] Num frames 12000...
|
2810 |
+
[2025-02-11 17:28:46,383][02117] Num frames 12100...
|
2811 |
+
[2025-02-11 17:28:46,507][02117] Num frames 12200...
|
2812 |
+
[2025-02-11 17:28:46,633][02117] Num frames 12300...
|
2813 |
+
[2025-02-11 17:28:46,760][02117] Num frames 12400...
|
2814 |
+
[2025-02-11 17:28:46,889][02117] Num frames 12500...
|
2815 |
+
[2025-02-11 17:28:47,018][02117] Num frames 12600...
|
2816 |
+
[2025-02-11 17:28:47,196][02117] Avg episode rewards: #0: 32.097, true rewards: #0: 12.697
|
2817 |
+
[2025-02-11 17:28:47,198][02117] Avg episode reward: 32.097, avg true_objective: 12.697
|
2818 |
+
[2025-02-11 17:28:47,203][02117] Num frames 12700...
|
2819 |
+
[2025-02-11 17:29:17,288][02117] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|
2820 |
+
[2025-02-11 17:30:55,450][02117] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json
|
2821 |
+
[2025-02-11 17:30:55,451][02117] Overriding arg 'num_workers' with value 1 passed from command line
|
2822 |
+
[2025-02-11 17:30:55,453][02117] Adding new argument 'no_render'=True that is not in the saved config file!
|
2823 |
+
[2025-02-11 17:30:55,454][02117] Adding new argument 'save_video'=True that is not in the saved config file!
|
2824 |
+
[2025-02-11 17:30:55,456][02117] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
2825 |
+
[2025-02-11 17:30:55,457][02117] Adding new argument 'video_name'=None that is not in the saved config file!
|
2826 |
+
[2025-02-11 17:30:55,459][02117] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
|
2827 |
+
[2025-02-11 17:30:55,460][02117] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
2828 |
+
[2025-02-11 17:30:55,461][02117] Adding new argument 'push_to_hub'=True that is not in the saved config file!
|
2829 |
+
[2025-02-11 17:30:55,463][02117] Adding new argument 'hf_repository'='mjm54/doom_health_gathering_supreme' that is not in the saved config file!
|
2830 |
+
[2025-02-11 17:30:55,464][02117] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
2831 |
+
[2025-02-11 17:30:55,466][02117] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
2832 |
+
[2025-02-11 17:30:55,467][02117] Adding new argument 'train_script'=None that is not in the saved config file!
|
2833 |
+
[2025-02-11 17:30:55,469][02117] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
2834 |
+
[2025-02-11 17:30:55,470][02117] Using frameskip 1 and render_action_repeat=4 for evaluation
|
2835 |
+
[2025-02-11 17:30:55,494][02117] RunningMeanStd input shape: (3, 72, 128)
|
2836 |
+
[2025-02-11 17:30:55,497][02117] RunningMeanStd input shape: (1,)
|
2837 |
+
[2025-02-11 17:30:55,508][02117] ConvEncoder: input_channels=3
|
2838 |
+
[2025-02-11 17:30:55,543][02117] Conv encoder output size: 512
|
2839 |
+
[2025-02-11 17:30:55,545][02117] Policy head output size: 512
|
2840 |
+
[2025-02-11 17:30:55,564][02117] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth...
|
2841 |
+
[2025-02-11 17:30:56,007][02117] Num frames 100...
|
2842 |
+
[2025-02-11 17:30:56,130][02117] Num frames 200...
|
2843 |
+
[2025-02-11 17:30:56,254][02117] Num frames 300...
|
2844 |
+
[2025-02-11 17:30:56,381][02117] Num frames 400...
|
2845 |
+
[2025-02-11 17:30:56,509][02117] Num frames 500...
|
2846 |
+
[2025-02-11 17:30:56,635][02117] Num frames 600...
|
2847 |
+
[2025-02-11 17:30:56,759][02117] Num frames 700...
|
2848 |
+
[2025-02-11 17:30:56,886][02117] Num frames 800...
|
2849 |
+
[2025-02-11 17:30:57,012][02117] Num frames 900...
|
2850 |
+
[2025-02-11 17:30:57,137][02117] Num frames 1000...
|
2851 |
+
[2025-02-11 17:30:57,261][02117] Num frames 1100...
|
2852 |
+
[2025-02-11 17:30:57,386][02117] Num frames 1200...
|
2853 |
+
[2025-02-11 17:30:57,511][02117] Num frames 1300...
|
2854 |
+
[2025-02-11 17:30:57,633][02117] Num frames 1400...
|
2855 |
+
[2025-02-11 17:30:57,760][02117] Num frames 1500...
|
2856 |
+
[2025-02-11 17:30:57,889][02117] Num frames 1600...
|
2857 |
+
[2025-02-11 17:30:58,016][02117] Num frames 1700...
|
2858 |
+
[2025-02-11 17:30:58,142][02117] Num frames 1800...
|
2859 |
+
[2025-02-11 17:30:58,272][02117] Num frames 1900...
|
2860 |
+
[2025-02-11 17:30:58,402][02117] Num frames 2000...
|
2861 |
+
[2025-02-11 17:30:58,530][02117] Num frames 2100...
|
2862 |
+
[2025-02-11 17:30:58,582][02117] Avg episode rewards: #0: 54.999, true rewards: #0: 21.000
|
2863 |
+
[2025-02-11 17:30:58,584][02117] Avg episode reward: 54.999, avg true_objective: 21.000
|
2864 |
+
[2025-02-11 17:30:58,707][02117] Num frames 2200...
|
2865 |
+
[2025-02-11 17:30:58,834][02117] Num frames 2300...
|
2866 |
+
[2025-02-11 17:30:58,960][02117] Num frames 2400...
|
2867 |
+
[2025-02-11 17:30:59,086][02117] Num frames 2500...
|
2868 |
+
[2025-02-11 17:30:59,210][02117] Num frames 2600...
|
2869 |
+
[2025-02-11 17:30:59,344][02117] Num frames 2700...
|
2870 |
+
[2025-02-11 17:30:59,410][02117] Avg episode rewards: #0: 32.539, true rewards: #0: 13.540
|
2871 |
+
[2025-02-11 17:30:59,412][02117] Avg episode reward: 32.539, avg true_objective: 13.540
|
2872 |
+
[2025-02-11 17:30:59,537][02117] Num frames 2800...
|
2873 |
+
[2025-02-11 17:30:59,676][02117] Num frames 2900...
|
2874 |
+
[2025-02-11 17:30:59,811][02117] Num frames 3000...
|
2875 |
+
[2025-02-11 17:30:59,948][02117] Num frames 3100...
|
2876 |
+
[2025-02-11 17:31:00,086][02117] Num frames 3200...
|
2877 |
+
[2025-02-11 17:31:00,228][02117] Num frames 3300...
|
2878 |
+
[2025-02-11 17:31:00,364][02117] Num frames 3400...
|
2879 |
+
[2025-02-11 17:31:00,495][02117] Num frames 3500...
|
2880 |
+
[2025-02-11 17:31:00,627][02117] Num frames 3600...
|
2881 |
+
[2025-02-11 17:31:00,761][02117] Num frames 3700...
|
2882 |
+
[2025-02-11 17:31:00,897][02117] Num frames 3800...
|
2883 |
+
[2025-02-11 17:31:01,022][02117] Num frames 3900...
|
2884 |
+
[2025-02-11 17:31:01,147][02117] Num frames 4000...
|
2885 |
+
[2025-02-11 17:31:01,304][02117] Avg episode rewards: #0: 31.613, true rewards: #0: 13.613
|
2886 |
+
[2025-02-11 17:31:01,306][02117] Avg episode reward: 31.613, avg true_objective: 13.613
|
2887 |
+
[2025-02-11 17:31:01,328][02117] Num frames 4100...
|
2888 |
+
[2025-02-11 17:31:01,450][02117] Num frames 4200...
|
2889 |
+
[2025-02-11 17:31:01,573][02117] Num frames 4300...
|
2890 |
+
[2025-02-11 17:31:01,698][02117] Num frames 4400...
|
2891 |
+
[2025-02-11 17:31:01,825][02117] Num frames 4500...
|
2892 |
+
[2025-02-11 17:31:01,951][02117] Num frames 4600...
|
2893 |
+
[2025-02-11 17:31:02,076][02117] Num frames 4700...
|
2894 |
+
[2025-02-11 17:31:02,201][02117] Num frames 4800...
|
2895 |
+
[2025-02-11 17:31:02,327][02117] Num frames 4900...
|
2896 |
+
[2025-02-11 17:31:02,453][02117] Num frames 5000...
|
2897 |
+
[2025-02-11 17:31:02,576][02117] Num frames 5100...
|
2898 |
+
[2025-02-11 17:31:02,703][02117] Num frames 5200...
|
2899 |
+
[2025-02-11 17:31:02,828][02117] Num frames 5300...
|
2900 |
+
[2025-02-11 17:31:02,954][02117] Num frames 5400...
|
2901 |
+
[2025-02-11 17:31:03,081][02117] Num frames 5500...
|
2902 |
+
[2025-02-11 17:31:03,208][02117] Num frames 5600...
|
2903 |
+
[2025-02-11 17:31:03,334][02117] Num frames 5700...
|
2904 |
+
[2025-02-11 17:31:03,463][02117] Num frames 5800...
|
2905 |
+
[2025-02-11 17:31:03,588][02117] Num frames 5900...
|
2906 |
+
[2025-02-11 17:31:03,715][02117] Num frames 6000...
|
2907 |
+
[2025-02-11 17:31:03,855][02117] Avg episode rewards: #0: 37.420, true rewards: #0: 15.170
|
2908 |
+
[2025-02-11 17:31:03,857][02117] Avg episode reward: 37.420, avg true_objective: 15.170
|
2909 |
+
[2025-02-11 17:31:03,900][02117] Num frames 6100...
|
2910 |
+
[2025-02-11 17:31:04,026][02117] Num frames 6200...
|
2911 |
+
[2025-02-11 17:31:04,152][02117] Num frames 6300...
|
2912 |
+
[2025-02-11 17:31:04,276][02117] Num frames 6400...
|
2913 |
+
[2025-02-11 17:31:04,400][02117] Num frames 6500...
|
2914 |
+
[2025-02-11 17:31:04,525][02117] Num frames 6600...
|
2915 |
+
[2025-02-11 17:31:04,650][02117] Num frames 6700...
|
2916 |
+
[2025-02-11 17:31:04,775][02117] Num frames 6800...
|
2917 |
+
[2025-02-11 17:31:04,900][02117] Num frames 6900...
|
2918 |
+
[2025-02-11 17:31:05,033][02117] Num frames 7000...
|
2919 |
+
[2025-02-11 17:31:05,164][02117] Num frames 7100...
|
2920 |
+
[2025-02-11 17:31:05,291][02117] Num frames 7200...
|
2921 |
+
[2025-02-11 17:31:05,416][02117] Num frames 7300...
|
2922 |
+
[2025-02-11 17:31:05,540][02117] Num frames 7400...
|
2923 |
+
[2025-02-11 17:31:05,650][02117] Avg episode rewards: #0: 37.088, true rewards: #0: 14.888
|
2924 |
+
[2025-02-11 17:31:05,651][02117] Avg episode reward: 37.088, avg true_objective: 14.888
|
2925 |
+
[2025-02-11 17:31:05,734][02117] Num frames 7500...
|
2926 |
+
[2025-02-11 17:31:05,857][02117] Num frames 7600...
|
2927 |
+
[2025-02-11 17:31:05,986][02117] Num frames 7700...
|
2928 |
+
[2025-02-11 17:31:06,117][02117] Num frames 7800...
|
2929 |
+
[2025-02-11 17:31:06,220][02117] Avg episode rewards: #0: 31.723, true rewards: #0: 13.057
|
2930 |
+
[2025-02-11 17:31:06,221][02117] Avg episode reward: 31.723, avg true_objective: 13.057
|
2931 |
+
[2025-02-11 17:31:06,307][02117] Num frames 7900...
|
2932 |
+
[2025-02-11 17:31:06,434][02117] Num frames 8000...
|
2933 |
+
[2025-02-11 17:31:06,556][02117] Num frames 8100...
|
2934 |
+
[2025-02-11 17:31:06,683][02117] Num frames 8200...
|
2935 |
+
[2025-02-11 17:31:06,809][02117] Num frames 8300...
|
2936 |
+
[2025-02-11 17:31:06,935][02117] Num frames 8400...
|
2937 |
+
[2025-02-11 17:31:07,059][02117] Num frames 8500...
|
2938 |
+
[2025-02-11 17:31:07,186][02117] Num frames 8600...
|
2939 |
+
[2025-02-11 17:31:07,312][02117] Num frames 8700...
|
2940 |
+
[2025-02-11 17:31:07,402][02117] Avg episode rewards: #0: 30.325, true rewards: #0: 12.469
|
2941 |
+
[2025-02-11 17:31:07,403][02117] Avg episode reward: 30.325, avg true_objective: 12.469
|
2942 |
+
[2025-02-11 17:31:07,493][02117] Num frames 8800...
|
2943 |
+
[2025-02-11 17:31:07,617][02117] Num frames 8900...
|
2944 |
+
[2025-02-11 17:31:07,742][02117] Num frames 9000...
|
2945 |
+
[2025-02-11 17:31:07,868][02117] Num frames 9100...
|
2946 |
+
[2025-02-11 17:31:07,993][02117] Num frames 9200...
|
2947 |
+
[2025-02-11 17:31:08,119][02117] Num frames 9300...
|
2948 |
+
[2025-02-11 17:31:08,245][02117] Num frames 9400...
|
2949 |
+
[2025-02-11 17:31:08,375][02117] Num frames 9500...
|
2950 |
+
[2025-02-11 17:31:08,499][02117] Num frames 9600...
|
2951 |
+
[2025-02-11 17:31:08,629][02117] Num frames 9700...
|
2952 |
+
[2025-02-11 17:31:08,755][02117] Num frames 9800...
|
2953 |
+
[2025-02-11 17:31:08,882][02117] Num frames 9900...
|
2954 |
+
[2025-02-11 17:31:09,009][02117] Num frames 10000...
|
2955 |
+
[2025-02-11 17:31:09,133][02117] Num frames 10100...
|
2956 |
+
[2025-02-11 17:31:09,255][02117] Num frames 10200...
|
2957 |
+
[2025-02-11 17:31:09,383][02117] Num frames 10300...
|
2958 |
+
[2025-02-11 17:31:09,508][02117] Num frames 10400...
|
2959 |
+
[2025-02-11 17:31:09,637][02117] Num frames 10500...
|
2960 |
+
[2025-02-11 17:31:09,717][02117] Avg episode rewards: #0: 32.397, true rewards: #0: 13.148
|
2961 |
+
[2025-02-11 17:31:09,718][02117] Avg episode reward: 32.397, avg true_objective: 13.148
|
2962 |
+
[2025-02-11 17:31:09,822][02117] Num frames 10600...
|
2963 |
+
[2025-02-11 17:31:09,950][02117] Num frames 10700...
|
2964 |
+
[2025-02-11 17:31:10,078][02117] Num frames 10800...
|
2965 |
+
[2025-02-11 17:31:10,206][02117] Num frames 10900...
|
2966 |
+
[2025-02-11 17:31:10,331][02117] Num frames 11000...
|
2967 |
+
[2025-02-11 17:31:10,459][02117] Num frames 11100...
|
2968 |
+
[2025-02-11 17:31:10,586][02117] Num frames 11200...
|
2969 |
+
[2025-02-11 17:31:10,714][02117] Num frames 11300...
|
2970 |
+
[2025-02-11 17:31:10,838][02117] Num frames 11400...
|
2971 |
+
[2025-02-11 17:31:10,964][02117] Num frames 11500...
|
2972 |
+
[2025-02-11 17:31:11,091][02117] Num frames 11600...
|
2973 |
+
[2025-02-11 17:31:11,219][02117] Num frames 11700...
|
2974 |
+
[2025-02-11 17:31:11,346][02117] Num frames 11800...
|
2975 |
+
[2025-02-11 17:31:11,474][02117] Num frames 11900...
|
2976 |
+
[2025-02-11 17:31:11,600][02117] Num frames 12000...
|
2977 |
+
[2025-02-11 17:31:11,725][02117] Num frames 12100...
|
2978 |
+
[2025-02-11 17:31:11,854][02117] Num frames 12200...
|
2979 |
+
[2025-02-11 17:31:11,983][02117] Num frames 12300...
|
2980 |
+
[2025-02-11 17:31:12,111][02117] Num frames 12400...
|
2981 |
+
[2025-02-11 17:31:12,237][02117] Num frames 12500...
|
2982 |
+
[2025-02-11 17:31:12,365][02117] Num frames 12600...
|
2983 |
+
[2025-02-11 17:31:12,443][02117] Avg episode rewards: #0: 35.686, true rewards: #0: 14.020
|
2984 |
+
[2025-02-11 17:31:12,444][02117] Avg episode reward: 35.686, avg true_objective: 14.020
|
2985 |
+
[2025-02-11 17:31:12,546][02117] Num frames 12700...
|
2986 |
+
[2025-02-11 17:31:12,673][02117] Num frames 12800...
|
2987 |
+
[2025-02-11 17:31:12,798][02117] Num frames 12900...
|
2988 |
+
[2025-02-11 17:31:12,864][02117] Avg episode rewards: #0: 32.408, true rewards: #0: 12.908
|
2989 |
+
[2025-02-11 17:31:12,866][02117] Avg episode reward: 32.408, avg true_objective: 12.908
|
2990 |
+
[2025-02-11 17:31:43,232][02117] Replay video saved to /content/train_dir/default_experiment/replay.mp4!
|