LizardAPN
/

rl_course_vizdoom_health_gathering_supreme

@@ -15,7 +15,7 @@ model-index:
       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
-      value: 3.81 +/- 0.29
       name: mean_reward
       verified: false
 ---

       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
+      value: 3.97 +/- 0.86
       name: mean_reward
       verified: false
 ---

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8fc314b82523aac9ec00b023c4111c969fe9f679b8f19156b23d6e4654bb44df
-size 5533409

 version https://git-lfs.github.com/spec/v1
+oid sha256:a549007a1480ea6dd428a375c9b56de16c0ca8c8ffd505c0e983c3953b63f755
+size 6158905

sf_log.txt CHANGED Viewed

@@ -1696,3 +1696,85 @@ main_loop: 1200.1674
 [2025-08-17 21:19:25,200][08154] Avg episode rewards: #0: 4.013, true rewards: #0: 3.813
 [2025-08-17 21:19:25,201][08154] Avg episode reward: 4.013, avg true_objective: 3.813
 [2025-08-17 21:19:30,114][08154] Replay video saved to /home/dmin/HuggingFace/notebooks/unit8/part_2/train_dir/default_experiment/replay.mp4!

 [2025-08-17 21:19:25,200][08154] Avg episode rewards: #0: 4.013, true rewards: #0: 3.813
 [2025-08-17 21:19:25,201][08154] Avg episode reward: 4.013, avg true_objective: 3.813
 [2025-08-17 21:19:30,114][08154] Replay video saved to /home/dmin/HuggingFace/notebooks/unit8/part_2/train_dir/default_experiment/replay.mp4!
+[2025-08-17 21:19:35,417][08154] The model has been pushed to https://huggingface.co/LizardAPN/rl_course_vizdoom_health_gathering_supreme
+[2025-08-17 21:20:16,176][08154] Loading existing experiment configuration from /home/dmin/HuggingFace/notebooks/unit8/part_2/train_dir/default_experiment/config.json
+[2025-08-17 21:20:16,177][08154] Overriding arg 'num_workers' with value 4 passed from command line
+[2025-08-17 21:20:16,178][08154] Adding new argument 'no_render'=True that is not in the saved config file!
+[2025-08-17 21:20:16,179][08154] Adding new argument 'save_video'=True that is not in the saved config file!
+[2025-08-17 21:20:16,180][08154] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2025-08-17 21:20:16,181][08154] Adding new argument 'video_name'=None that is not in the saved config file!
+[2025-08-17 21:20:16,182][08154] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
+[2025-08-17 21:20:16,183][08154] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2025-08-17 21:20:16,184][08154] Adding new argument 'push_to_hub'=True that is not in the saved config file!
+[2025-08-17 21:20:16,185][08154] Adding new argument 'hf_repository'='LizardAPN/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
+[2025-08-17 21:20:16,186][08154] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2025-08-17 21:20:16,186][08154] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2025-08-17 21:20:16,188][08154] Adding new argument 'train_script'=None that is not in the saved config file!
+[2025-08-17 21:20:16,189][08154] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2025-08-17 21:20:16,190][08154] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2025-08-17 21:20:16,211][08154] RunningMeanStd input shape: (3, 72, 128)
+[2025-08-17 21:20:16,213][08154] RunningMeanStd input shape: (1,)
+[2025-08-17 21:20:16,245][08154] ConvEncoder: input_channels=3
+[2025-08-17 21:20:16,294][08154] Conv encoder output size: 512
+[2025-08-17 21:20:16,295][08154] Policy head output size: 512
+[2025-08-17 21:20:16,330][08154] Loading state from checkpoint /home/dmin/HuggingFace/notebooks/unit8/part_2/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2025-08-17 21:20:16,820][08154] Num frames 100...
+[2025-08-17 21:20:17,032][08154] Num frames 200...
+[2025-08-17 21:20:17,186][08154] Num frames 300...
+[2025-08-17 21:20:17,379][08154] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2025-08-17 21:20:17,381][08154] Avg episode reward: 3.840, avg true_objective: 3.840
+[2025-08-17 21:20:17,408][08154] Num frames 400...
+[2025-08-17 21:20:17,610][08154] Num frames 500...
+[2025-08-17 21:20:17,779][08154] Num frames 600...
+[2025-08-17 21:20:17,942][08154] Num frames 700...
+[2025-08-17 21:20:18,102][08154] Num frames 800...
+[2025-08-17 21:20:18,211][08154] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
+[2025-08-17 21:20:18,212][08154] Avg episode reward: 4.660, avg true_objective: 4.160
+[2025-08-17 21:20:18,336][08154] Num frames 900...
+[2025-08-17 21:20:18,516][08154] Num frames 1000...
+[2025-08-17 21:20:18,679][08154] Num frames 1100...
+[2025-08-17 21:20:18,846][08154] Num frames 1200...
+[2025-08-17 21:20:18,928][08154] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
+[2025-08-17 21:20:18,929][08154] Avg episode reward: 4.387, avg true_objective: 4.053
+[2025-08-17 21:20:19,095][08154] Num frames 1300...
+[2025-08-17 21:20:19,298][08154] Num frames 1400...
+[2025-08-17 21:20:19,499][08154] Num frames 1500...
+[2025-08-17 21:20:19,669][08154] Num frames 1600...
+[2025-08-17 21:20:19,855][08154] Num frames 1700...
+[2025-08-17 21:20:20,021][08154] Avg episode rewards: #0: 5.150, true rewards: #0: 4.400
+[2025-08-17 21:20:20,022][08154] Avg episode reward: 5.150, avg true_objective: 4.400
+[2025-08-17 21:20:20,107][08154] Num frames 1800...
+[2025-08-17 21:20:20,299][08154] Num frames 1900...
+[2025-08-17 21:20:20,495][08154] Num frames 2000...
+[2025-08-17 21:20:20,584][08154] Avg episode rewards: #0: 4.632, true rewards: #0: 4.032
+[2025-08-17 21:20:20,585][08154] Avg episode reward: 4.632, avg true_objective: 4.032
+[2025-08-17 21:20:20,732][08154] Num frames 2100...
+[2025-08-17 21:20:20,918][08154] Num frames 2200...
+[2025-08-17 21:20:21,101][08154] Avg episode rewards: #0: 4.287, true rewards: #0: 3.787
+[2025-08-17 21:20:21,102][08154] Avg episode reward: 4.287, avg true_objective: 3.787
+[2025-08-17 21:20:21,157][08154] Num frames 2300...
+[2025-08-17 21:20:21,334][08154] Num frames 2400...
+[2025-08-17 21:20:21,506][08154] Num frames 2500...
+[2025-08-17 21:20:21,685][08154] Num frames 2600...
+[2025-08-17 21:20:21,860][08154] Num frames 2700...
+[2025-08-17 21:20:21,951][08154] Avg episode rewards: #0: 4.457, true rewards: #0: 3.886
+[2025-08-17 21:20:21,951][08154] Avg episode reward: 4.457, avg true_objective: 3.886
+[2025-08-17 21:20:22,085][08154] Num frames 2800...
+[2025-08-17 21:20:22,259][08154] Num frames 2900...
+[2025-08-17 21:20:22,430][08154] Num frames 3000...
+[2025-08-17 21:20:22,631][08154] Num frames 3100...
+[2025-08-17 21:20:22,695][08154] Avg episode rewards: #0: 4.380, true rewards: #0: 3.880
+[2025-08-17 21:20:22,696][08154] Avg episode reward: 4.380, avg true_objective: 3.880
+[2025-08-17 21:20:22,891][08154] Num frames 3200...
+[2025-08-17 21:20:23,094][08154] Num frames 3300...
+[2025-08-17 21:20:23,269][08154] Num frames 3400...
+[2025-08-17 21:20:23,466][08154] Avg episode rewards: #0: 4.320, true rewards: #0: 3.876
+[2025-08-17 21:20:23,467][08154] Avg episode reward: 4.320, avg true_objective: 3.876
+[2025-08-17 21:20:23,492][08154] Num frames 3500...
+[2025-08-17 21:20:23,653][08154] Num frames 3600...
+[2025-08-17 21:20:23,813][08154] Num frames 3700...
+[2025-08-17 21:20:23,970][08154] Num frames 3800...
+[2025-08-17 21:20:24,129][08154] Num frames 3900...
+[2025-08-17 21:20:24,306][08154] Avg episode rewards: #0: 4.468, true rewards: #0: 3.968
+[2025-08-17 21:20:24,307][08154] Avg episode reward: 4.468, avg true_objective: 3.968
+[2025-08-17 21:20:29,751][08154] Replay video saved to /home/dmin/HuggingFace/notebooks/unit8/part_2/train_dir/default_experiment/replay.mp4!